cvs2svn

   1 #!/usr/bin/env python
   2 # (Be in -*- python -*- mode.)
   3 #
   4 # cvs2svn: ...
   5 #
   6 # ====================================================================
   7 # Copyright (c) 2000-2004 CollabNet.  All rights reserved.
   8 #
   9 # This software is licensed as described in the file COPYING, which
  10 # you should have received as part of this distribution.  The terms
  11 # are also available at http://subversion.tigris.org/license-1.html.
  12 # If newer versions of this license are posted there, you may use a
  13 # newer version instead, at your option.
  14 #
  15 # This software consists of voluntary contributions made by many
  16 # individuals.  For exact contribution history, see the revision
  17 # history and logs, available at http://cvs2svn.tigris.org/.
  18 # ====================================================================
  19
  20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
  21
  22 import cvs2svn_rcsparse
  23 import os
  24 import sys
  25 import sha
  26 import re
  27 import time
  28 import fileinput
  29 import string
  30 import getopt
  31 import stat
  32 import md5
  33 import marshal
  34 import errno
  35 import popen2
  36 import types
  37 try:
  38   # Try to get access to a bunch of encodings for use with --encoding.
  39   # See http://cjkpython.i18n.org/ for details.
  40   import iconv_codec
  41 except ImportError:
  42   pass
  43
  44 # Warnings and errors start with these strings.  They are typically
  45 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
  46 warning_prefix = "WARNING"
  47 error_prefix = "ERROR"
  48
  49 # Make sure this Python is recent enough.
  50 if sys.hexversion < 0x2000000:
  51   sys.stderr.write("'%s: Python 2.0 or higher required, "
  52                    "see www.python.org.\n" % error_prefix)
  53   sys.exit(1)
  54
  55 # Pretend we have true booleans on older python versions
  56 try:
  57   True
  58 except:
  59   True = 1
  60   False = 0
  61
  62 # Opening pipes was a mess before Python 2.4, because some methods did
  63 # not exist on some platforms, and some behaved differenly on other.
  64 # Python 2.4 solved this by adding the subprocess module, but since we
  65 # cannot require such a new version, we cannot use it directly, but
  66 # must implement a simplified Popen using the best means neccessary.
  67 #
  68 # The SimplePopen class only has the following members and methods, all
  69 # behaving as documented in the subprocess.Popen class:
  70 #     - stdin
  71 #     - stdout
  72 #     - stderr
  73 #     - wait
  74 try:
  75   # First try subprocess.Popen...
  76   import subprocess
  77   class SimplePopen:
  78     def __init__(self, cmd, capture_stderr):
  79       if capture_stderr:
  80         stderr = subprocess.PIPE
  81       else:
  82         stderr = None
  83       self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
  84                                     stdout=subprocess.PIPE, stderr=stderr)
  85       self.stdin = self._popen.stdin
  86       self.stdout = self._popen.stdout
  87       if capture_stderr:
  88         self.stderr = self._popen.stderr
  89       self.wait = self._popen.wait
  90 except ImportError:
  91   if hasattr(popen2, 'Popen3'):
  92     # ...then try popen2.Popen3...
  93     class SimplePopen:
  94       def __init__(self, cmd, capture_stderr):
  95         self._popen3 = popen2.Popen3(cmd, capture_stderr)
  96         self.stdin = self._popen3.tochild
  97         self.stdout = self._popen3.fromchild
  98         if capture_stderr:
  99           self.stderr = self._popen3.childerr
 100         self.wait = self._popen3.wait
 101   else:
 102     # ...and if all fails, use popen2.popen3...
 103     class SimplePopen:
 104       def __init__(self, cmd, capture_stderr):
 105         if type(cmd) != types.StringType:
 106           cmd = argv_to_command_string(cmd)
 107         self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
 108       def wait(self):
 109         return self.stdout.close() or self.stdin.close() or \
 110                self.stderr.close()
 111
 112 # DBM module selection
 113
 114 # 1. If we have bsddb3, it is probably newer than bsddb.  Fake bsddb = bsddb3,
 115 #    so that the dbhash module used by anydbm will use bsddb3.
 116 try:
 117   import bsddb3
 118   sys.modules['bsddb'] = sys.modules['bsddb3']
 119 except ImportError:
 120   pass
 121
 122 # 2. These DBM modules are not good for cvs2svn.
 123 import anydbm
 124 if (anydbm._defaultmod.__name__ == 'dumbdbm'
 125     or anydbm._defaultmod.__name__ == 'dbm'):
 126   sys.stderr.write(
 127     error_prefix
 128     + ': your installation of Python does not contain a suitable\n'
 129     + 'DBM module -- cvs2svn cannot continue.\n'
 130     + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
 131   sys.exit(1)
 132
 133 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
 134 #    Unfortunately, gdbm appears not to be trouble free, either.
 135 if hasattr(anydbm._defaultmod, 'bsddb') \
 136     and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
 137   try:
 138     gdbm = __import__('gdbm')
 139   except ImportError:
 140     sys.stderr.write(warning_prefix +
 141         ': The version of the bsddb module found '
 142         'on your computer has been reported to malfunction on some datasets, '
 143         'causing KeyError exceptions. You may wish to upgrade your Python to '
 144         'version 2.3 or later.\n')
 145   else:
 146     anydbm._defaultmod = gdbm
 147
 148 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
 149 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
 150 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
 151
 152 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
 153
 154 # This really only matches standard '1.1.1.*'-style vendor revisions.
 155 # One could conceivably have a file whose default branch is 1.1.3 or
 156 # whatever, or was that at some point in time, with vendor revisions
 157 # 1.1.3.1, 1.1.3.2, etc.  But with the default branch gone now (which
 158 # is the only time this regexp gets used), we'd have no basis for
 159 # assuming that the non-standard vendor branch had ever been the
 160 # default branch anyway, so we don't want this to match them anyway.
 161 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
 162
 163 # If this run's output is a repository, then (in the tmpdir) we use
 164 # a dumpfile of this name for repository loads.
 165 #
 166 # If this run's output is a dumpfile, then this is default name of
 167 # that dumpfile, but in the current directory (unless the user has
 168 # specified a dumpfile path, of course, in which case it will be
 169 # wherever the user said).
 170 DUMPFILE = 'cvs2svn-dump'
 171
 172 # This file appears with different suffixes at different stages of
 173 # processing.  CVS revisions are cleaned and sorted here, for commit
 174 # grouping.  See design-notes.txt for details.
 175 DATAFILE = 'cvs2svn-data'
 176
 177 # This file contains a marshalled copy of all the statistics that we
 178 # gather throughout the various runs of cvs2svn.  The data stored as a
 179 # marshalled dictionary.
 180 STATISTICS_FILE = 'cvs2svn-statistics'
 181
 182 # This text file contains records (1 per line) that describe svn
 183 # filesystem paths that are the opening and closing source revisions
 184 # for copies to tags and branches.  The format is as follows:
 185 #
 186 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
 187 #
 188 # Where type is either OPENING or CLOSING.  The SYMBOL_NAME and
 189 # SVN_REVNUM are the primary and secondary sorting criteria for
 190 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
 191 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
 192 # A sorted version of the above file.
 193 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
 194
 195 # This file is a temporary file for storing symbolic_name -> closing
 196 # CVSRevision until the end of our pass where we can look up the
 197 # corresponding SVNRevNum for the closing revs and write these out to
 198 # the SYMBOL_OPENINGS_CLOSINGS.
 199 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
 200
 201 # Skeleton version of an svn filesystem.
 202 # (These supersede and will eventually replace the two above.)
 203 # See class SVNRepositoryMirror for how these work.
 204 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
 205 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
 206
 207 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
 208 # SYMBOL_OPENINGS_CLOSINGS_SORTED
 209 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
 210
 211 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
 212 # the CVSRevision is the last such that is a source for those symbolic
 213 # names.  For example, if branch B's number is 1.3.0.2 in this CVS
 214 # file, and this file's 1.3 is the latest (by date) revision among
 215 # *all* CVS files that is a source for branch B, then the
 216 # CVSRevision.unique_key() corresponding to this file at 1.3 would
 217 # list at least B in its list.
 218 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
 219
 220 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
 221 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
 222 ### the s-revs data in this database.
 223 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
 224
 225 # Lists all symbolic names that are tags.  Keys are strings (symbolic
 226 # names), values are ignorable.
 227 TAGS_DB = 'cvs2svn-tags.db'
 228
 229 # A list all tags.  Each line consists of the tag name and the number
 230 # of files in which it exists, separated by a space.
 231 TAGS_LIST = 'cvs2svn-tags.txt'
 232
 233 # A list of all branches.  The file is stored as a plain text file
 234 # to make it easy to look at in an editor.  Each line contains the
 235 # branch name, the number of files where the branch is created, the
 236 # commit count, and a list of tags and branches that are defined on
 237 # revisions in the branch.
 238 BRANCHES_LIST = 'cvs2svn-branches.txt'
 239
 240 # These two databases provide a bidirectional mapping between
 241 # CVSRevision.unique_key()s and Subversion revision numbers.
 242 #
 243 # The first maps CVSRevision.unique_key() to a number; the values are
 244 # not unique.
 245 #
 246 # The second maps a number to a list of CVSRevision.unique_key()s.
 247 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
 248 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
 249
 250 # This database maps svn_revnums to tuples of (symbolic_name, date).
 251 #
 252 # The svn_revnums are the revision numbers of all non-primary
 253 # SVNCommits.  No primary SVNCommit has a key in this database.
 254 #
 255 # The date is stored for all commits in this database.
 256 #
 257 # For commits that fill symbolic names, the symbolic_name is stored.
 258 # For commits that default branch syncs, the symbolic_name is None.
 259 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
 260
 261 # This database maps svn_revnums of a default branch synchronization
 262 # commit to the svn_revnum of the primary SVNCommit that motivated it.
 263 #
 264 # (NOTE: Secondary commits that fill branches and tags also have a
 265 # motivating commit, but we do not record it because it is (currently)
 266 # not needed for anything.)
 267 #
 268 # This mapping is used when generating the log message for the commit
 269 # that synchronizes the default branch with trunk.
 270 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
 271
 272 # How many bytes to read at a time from a pipe.  128 kiB should be
 273 # large enough to be efficient without wasting too much memory.
 274 PIPE_READ_SIZE = 128 * 1024
 275
 276 # Record the default RCS branches, if any, for CVS filepaths.
 277 #
 278 # The keys are CVS filepaths, relative to the top of the repository
 279 # and with the ",v" stripped off, so they match the cvs paths used in
 280 # Commit.commit().  The values are vendor branch revisions, such as
 281 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'.  The vendor branch revision
 282 # represents the highest vendor branch revision thought to have ever
 283 # been head of the default branch.
 284 #
 285 # The reason we record a specific vendor revision, rather than a
 286 # default branch number, is that there are two cases to handle:
 287 #
 288 # One case is simple.  The RCS file lists a default branch explicitly
 289 # in its header, such as '1.1.1'.  In this case, we know that every
 290 # revision on the vendor branch is to be treated as head of trunk at
 291 # that point in time.
 292 #
 293 # But there's also a degenerate case.  The RCS file does not currently
 294 # have a default branch, yet we can deduce that for some period in the
 295 # past it probably *did* have one.  For example, the file has vendor
 296 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
 297 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2.  In this
 298 # case, we should record 1.1.1.96 as the last vendor revision to have
 299 # been the head of the default branch.
 300 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
 301
 302 # Records the author and log message for each changeset.
 303 # The keys are author+log digests, the same kind used to identify
 304 # unique revisions in the .revs, etc files.  Each value is a tuple
 305 # of two elements: '(author logmessage)'.
 306 METADATA_DB = "cvs2svn-metadata.db"
 307
 308 # A temporary on-disk hash that maps CVSRevision unique keys to a new
 309 # timestamp for that CVSRevision.  These new timestamps are created in
 310 # pass2, and this hash is used exclusively in pass2.
 311 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
 312
 313 REVS_SUFFIX = '.revs'
 314 CLEAN_REVS_SUFFIX = '.c-revs'
 315 SORTED_REVS_SUFFIX = '.s-revs'
 316 RESYNC_SUFFIX = '.resync'
 317
 318 SVN_INVALID_REVNUM = -1
 319
 320 COMMIT_THRESHOLD = 5 * 60       # flush a commit if a 5 minute gap occurs
 321
 322 # Things that can happen to a file.
 323 OP_NOOP   = '-'
 324 OP_ADD    = 'A'
 325 OP_DELETE = 'D'
 326 OP_CHANGE = 'C'
 327
 328 # A deltatext either does or doesn't represent some change.
 329 DELTATEXT_NONEMPTY = 'N'
 330 DELTATEXT_EMPTY    = 'E'
 331
 332 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
 333
 334 # Constants used in SYMBOL_OPENINGS_CLOSINGS
 335 OPENING = 'O'
 336 CLOSING = 'C'
 337
 338 class FatalException(Exception):
 339   """Exception thrown on a non-recoverable error.
 340
 341   If this exception is thrown by main(), it is caught by the global
 342   layer of the program, its string representation is printed, and the
 343   program is ended with an exit code of 1."""
 344
 345   pass
 346
 347
 348 class FatalError(FatalException):
 349   """A FatalException that prepends error_prefix to the message."""
 350
 351   def __init__(self, msg):
 352     """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
 353
 354     FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
 355
 356
 357 def temp(basename):
 358   """Return a path to BASENAME in Ctx().tmpdir.
 359   This is a convenience function to save horizontal space in source."""
 360   return os.path.join(Ctx().tmpdir, basename)
 361
 362 # Since the unofficial set also includes [/\] we need to translate those
 363 # into ones that don't conflict with Subversion limitations.
 364 def _clean_symbolic_name(name):
 365   """Return symbolic name NAME, translating characters that Subversion
 366   does not allow in a pathname."""
 367   name = name.replace('/','++')
 368   name = name.replace('\\','--')
 369   return name
 370
 371 def _path_join(*components):
 372   """Join two or more pathname COMPONENTS, inserting '/' as needed.
 373   Empty component are skipped."""
 374   return string.join(filter(None, components), '/')
 375
 376 def _path_split(path):
 377   """Split the svn pathname PATH into a pair, (HEAD, TAIL).
 378
 379   This is similar to os.path.split(), but always uses '/' as path
 380   separator.  PATH is an svn path, which should not start with a '/'.
 381   HEAD is everything before the last slash, and TAIL is everything
 382   after.  If PATH ends in a slash, TAIL will be empty.  If there is no
 383   slash in PATH, HEAD will be empty.  If PATH is empty, both HEAD and
 384   TAIL are empty."""
 385
 386   pos = path.rfind('/')
 387   if pos == -1:
 388     return ('', path,)
 389   else:
 390     return (path[:pos], path[pos+1:],)
 391
 392 def to_utf8(value, mode='replace'):
 393   """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
 394   as valid source encodings.  Raise UnicodeError on failure of all
 395   source encodings."""
 396   ### FIXME: The 'replace' default mode should be an option,
 397   ### like --encoding is.
 398   for encoding in Ctx().encoding:
 399     try:
 400       return unicode(value, encoding, mode).encode('utf8')
 401     except UnicodeError:
 402       Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
 403                   % (encoding, value))
 404   raise UnicodeError
 405
 406 def run_command(command):
 407   if os.system(command):
 408     raise FatalError('Command failed: "%s"' % (command,))
 409
 410
 411 class CommandFailedException(Exception):
 412   """Exception raised if check_command_runs() fails."""
 413
 414   pass
 415
 416
 417 def check_command_runs(cmd, cmdname):
 418   """Check whether the command CMD can be executed without errors.
 419
 420   CMD is a list or string, as accepted by SimplePopen.  CMDNAME is the
 421   name of the command as it should be included in exception error
 422   messages.
 423
 424   This function checks three things: (1) the command can be run
 425   without throwing an OSError; (2) it exits with status=0; (3) it
 426   doesn't output anything to stderr.  If any of these conditions is
 427   not met, raise a CommandFailedException describing the problem."""
 428
 429   try:
 430     pipe = SimplePopen(cmd, True)
 431   except OSError, e:
 432     raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
 433   pipe.stdin.close()
 434   pipe.stdout.read()
 435   errmsg = pipe.stderr.read()
 436   status = pipe.wait()
 437   if status != 0 or errmsg:
 438     msg = 'error executing %s: status %s' % (cmdname, status,)
 439     if errmsg:
 440       msg += ', error output:\n%s' % (errmsg,)
 441     raise CommandFailedException(msg)
 442
 443
 444 class CVSRepository:
 445   """A CVS repository from which data can be extracted."""
 446
 447   def __init__(self, cvs_repos_path):
 448     """CVS_REPOS_PATH is the top of the CVS repository (at least as
 449     far as this run is concerned)."""
 450
 451     if not os.path.isdir(cvs_repos_path):
 452       raise FatalError("The specified CVS repository path '%s' is not an "
 453                        "existing directory." % cvs_repos_path)
 454
 455     self.cvs_repos_path = os.path.normpath(cvs_repos_path)
 456
 457   def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
 458     """Return a command string, and the pipe created using that
 459     string.  C_REV is a CVSRevision.  If SUPPRESS_KEYWORD_SUBSTITUTION
 460     is True, then suppress the substitution of RCS/CVS keywords in the
 461     output.  The pipe returns the text of that CVS Revision."""
 462     raise NotImplementedError
 463
 464
 465 class CVSRepositoryViaRCS(CVSRepository):
 466   """A CVSRepository accessed via RCS."""
 467
 468   def __init__(self, cvs_repos_path):
 469     CVSRepository.__init__(self, cvs_repos_path)
 470     try:
 471       check_command_runs([ 'co', '-V' ], 'co')
 472     except CommandFailedException, e:
 473       raise FatalError('%s\n'
 474                        'Please check that co is installed and in your PATH\n'
 475                        '(it is a part of the RCS software).' % (e,))
 476
 477   def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
 478     pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
 479     if suppress_keyword_substitution:
 480       pipe_cmd.append('-kk')
 481     pipe_cmd.append(c_rev.rcs_path())
 482     pipe = SimplePopen(pipe_cmd, True)
 483     pipe.stdin.close()
 484     return pipe_cmd, pipe
 485
 486
 487 class CVSRepositoryViaCVS(CVSRepository):
 488   """A CVSRepository accessed via CVS."""
 489
 490   def __init__(self, cvs_repos_path):
 491     CVSRepository.__init__(self, cvs_repos_path)
 492     # Ascend above the specified root if necessary, to find the
 493     # cvs_repository_root (a directory containing a CVSROOT directory)
 494     # and the cvs_module (the path of the conversion root within the
 495     # cvs repository) NB: cvs_module must be seperated by '/' *not* by
 496     # os.sep .
 497     self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
 498     prev_cvs_repository_root = None
 499     self.cvs_module = ""
 500     while prev_cvs_repository_root != self.cvs_repository_root:
 501       if os.path.isdir(os.path.join(self.cvs_repository_root, 'CVSROOT')):
 502         break
 503       prev_cvs_repository_root = self.cvs_repository_root
 504       self.cvs_repository_root, module_component = \
 505           os.path.split(self.cvs_repository_root)
 506       self.cvs_module = module_component + "/" + self.cvs_module
 507     else:
 508       # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
 509       raise FatalError("the path '%s' is not a CVS repository, nor a path "
 510                        "within a CVS repository.  A CVS repository contains "
 511                        "a CVSROOT directory within its root directory."
 512                        % (self.cvs_repos_path,))
 513     os.environ['CVSROOT'] = self.cvs_repository_root
 514
 515     def cvs_ok(global_arguments):
 516       check_command_runs(
 517         [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
 518
 519     self.global_arguments = [ "-q", "-R" ]
 520     try:
 521       cvs_ok(self.global_arguments)
 522     except CommandFailedException, e:
 523       self.global_arguments = [ "-q" ]
 524       try:
 525         cvs_ok(self.global_arguments)
 526       except CommandFailedException, e:
 527         raise FatalError(
 528             '%s\n'
 529             'Please check that cvs is installed and in your PATH.' % (e,))
 530
 531   def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
 532     pipe_cmd = [ 'cvs' ] + self.global_arguments + \
 533                [ 'co', '-r' + c_rev.rev, '-p' ]
 534     if suppress_keyword_substitution:
 535       pipe_cmd.append('-kk')
 536     pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
 537     pipe = SimplePopen(pipe_cmd, True)
 538     pipe.stdin.close()
 539     return pipe_cmd, pipe
 540
 541
 542 def generate_ignores(c_rev):
 543   # Read in props
 544   pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
 545   buf = pipe.stdout.read(PIPE_READ_SIZE)
 546   raw_ignore_val = ""
 547   while buf:
 548     raw_ignore_val = raw_ignore_val + buf
 549     buf = pipe.stdout.read(PIPE_READ_SIZE)
 550   pipe.stdout.close()
 551   error_output = pipe.stderr.read()
 552   exit_status = pipe.wait()
 553   if exit_status:
 554     raise FatalError("The command '%s' failed with exit status: %s\n"
 555                      "and the following output:\n"
 556                      "%s" % (pipe_cmd, exit_status, error_output))
 557
 558   # Tweak props: First, convert any spaces to newlines...
 559   raw_ignore_val = '\n'.join(raw_ignore_val.split())
 560   raw_ignores = raw_ignore_val.split('\n')
 561   ignore_vals = [ ]
 562   for ignore in raw_ignores:
 563     # Reset the list if we encounter a '!'
 564     # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
 565     if ignore == '!':
 566       ignore_vals = [ ]
 567       continue
 568     # Skip empty lines
 569     if len(ignore) == 0:
 570       continue
 571     ignore_vals.append(ignore)
 572   return ignore_vals
 573
 574 # Return a string that has not been returned by gen_key() before.
 575 gen_key_base = 0L
 576 def gen_key():
 577   global gen_key_base
 578   key = '%x' % gen_key_base
 579   gen_key_base = gen_key_base + 1
 580   return key
 581
 582 # ============================================================================
 583 # This code is copied with a few modifications from:
 584 #   subversion/subversion/bindings/swig/python/svn/core.py
 585
 586 if sys.platform == "win32":
 587   _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
 588
 589   def escape_shell_arg(arg):
 590     # The (very strange) parsing rules used by the C runtime library are
 591     # described at:
 592     # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
 593
 594     # double up slashes, but only if they are followed by a quote character
 595     arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
 596
 597     # surround by quotes and escape quotes inside
 598     arg = '"' + string.replace(arg, '"', '"^""') + '"'
 599     return arg
 600
 601
 602   def argv_to_command_string(argv):
 603     """Flatten a list of command line arguments into a command string.
 604
 605     The resulting command string is expected to be passed to the system
 606     shell which os functions like popen() and system() invoke internally.
 607     """
 608
 609     # According cmd's usage notes (cmd /?), it parses the command line by
 610     # "seeing if the first character is a quote character and if so, stripping
 611     # the leading character and removing the last quote character."
 612     # So to prevent the argument string from being changed we add an extra set
 613     # of quotes around it here.
 614     return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
 615
 616 else:
 617   def escape_shell_arg(str):
 618     return "'" + string.replace(str, "'", "'\\''") + "'"
 619
 620   def argv_to_command_string(argv):
 621     """Flatten a list of command line arguments into a command string.
 622
 623     The resulting command string is expected to be passed to the system
 624     shell which os functions like popen() and system() invoke internally.
 625     """
 626
 627     return string.join(map(escape_shell_arg, argv), " ")
 628 # ============================================================================
 629
 630 def format_date(date):
 631   """Return an svn-compatible date string for DATE (seconds since epoch)."""
 632   # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
 633   return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
 634
 635 def sort_file(infile, outfile):
 636   # sort the log files
 637
 638   # GNU sort will sort our dates differently (incorrectly!) if our
 639   # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
 640   # it to 'C'
 641   lc_all_tmp = os.environ.get('LC_ALL', None)
 642   os.environ['LC_ALL'] = 'C'
 643   # The -T option to sort has a nice side effect.  The Win32 sort is
 644   # case insensitive and cannot be used, and since it does not
 645   # understand the -T option and dies if we try to use it, there is
 646   # no risk that we use that sort by accident.
 647   run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
 648   if lc_all_tmp is None:
 649     del os.environ['LC_ALL']
 650   else:
 651     os.environ['LC_ALL'] = lc_all_tmp
 652
 653 def match_regexp_list(regexp_list, string):
 654   """Test whether STRING matches any of the compiled regexps in
 655   REGEXP_LIST."""
 656   for regexp in regexp_list:
 657     if regexp.match(string):
 658       return True
 659   return False
 660
 661 class LF_EOL_Filter:
 662   """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
 663   into LFs only."""
 664   def __init__(self, stream):
 665     self.stream = stream
 666     self.carry_cr = False
 667     self.eof = False
 668
 669   def read(self, size):
 670     while True:
 671       buf = self.stream.read(size)
 672       self.eof = len(buf) == 0
 673       if self.carry_cr:
 674         buf = '\r' + buf
 675         self.carry_cr = False
 676       if not self.eof and buf[-1] == '\r':
 677         self.carry_cr = True
 678         buf = buf[:-1]
 679       buf = string.replace(buf, '\r\n', '\n')
 680       buf = string.replace(buf, '\r', '\n')
 681       if len(buf) > 0 or self.eof:
 682         return buf
 683
 684
 685 # These constants represent the log levels that this script supports
 686 LOG_WARN = -1
 687 LOG_QUIET = 0
 688 LOG_NORMAL = 1
 689 LOG_VERBOSE = 2
 690 class Log:
 691   """A Simple logging facility.  Each line will be timestamped is
 692   self.use_timestamps is TRUE.  This class is a Borg, see
 693   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
 694   __shared_state = {}
 695   def __init__(self):
 696     self.__dict__ = self.__shared_state
 697     if self.__dict__:
 698       return
 699     self.log_level = LOG_NORMAL
 700     # Set this to true if you want to see timestamps on each line output.
 701     self.use_timestamps = None
 702     self.logger = sys.stdout
 703
 704   def _timestamp(self):
 705     """Output a detailed timestamp at the beginning of each line output."""
 706     self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
 707
 708   def write(self, log_level, *args):
 709     """This is the public method to use for writing to a file.  Only
 710     messages whose LOG_LEVEL is <= self.log_level will be printed.  If
 711     there are multiple ARGS, they will be separated by a space."""
 712     if log_level > self.log_level:
 713       return
 714     if self.use_timestamps:
 715       self._timestamp()
 716     self.logger.write(' '.join(map(str,args)) + "\n")
 717     # Ensure that log output doesn't get out-of-order with respect to
 718     # stderr output.
 719     self.logger.flush()
 720
 721
 722 class Cleanup:
 723   """This singleton class manages any files created by cvs2svn.  When
 724   you first create a file, call Cleanup.register, passing the
 725   filename, and the last pass that you need the file.  After the end
 726   of that pass, your file will be cleaned up after running an optional
 727   callback.  This class is a Borg, see
 728   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
 729
 730   __shared_state = {}
 731   def __init__(self):
 732     self.__dict__ = self.__shared_state
 733     if self.__dict__:
 734       return
 735     self._log = {}
 736     self._callbacks = {}
 737
 738   def register(self, file, which_pass, callback=None):
 739     """Register FILE for cleanup at the end of WHICH_PASS, running
 740     function CALLBACK prior to removal.  Registering a given FILE is
 741     idempotent; you may register as many times as you wish, but it
 742     will only be cleaned up once.
 743
 744     Note that if a file is registered multiple times, only the first
 745     callback registered for that file will be called at cleanup
 746     time.  Also note that if you register a database file you must
 747     close the database before cleanup, e.g. using a callback."""
 748     self._log.setdefault(which_pass, {})[file] = 1
 749     if callback and not self._callbacks.has_key(file):
 750       self._callbacks[file] = callback
 751
 752   def cleanup(self, which_pass):
 753     """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
 754     if not self._log.has_key(which_pass):
 755       return
 756     for file in self._log[which_pass].keys():
 757       Log().write(LOG_VERBOSE, "Deleting", file)
 758       if self._callbacks.has_key(file):
 759         self._callbacks[file]()
 760       os.unlink(file)
 761
 762
 763 # Always use these constants for opening databases.
 764 DB_OPEN_READ = 'r'
 765 DB_OPEN_NEW = 'n'
 766
 767
 768 class AbstractDatabase:
 769   """An abstract base class for anydbm-based databases."""
 770
 771   def __init__(self, filename, mode):
 772     """A convenience function for opening an anydbm database."""
 773     # pybsddb3 has a bug which prevents it from working with
 774     # Berkeley DB 4.2 if you open the db with 'n' ("new").  This
 775     # causes the DB_TRUNCATE flag to be passed, which is disallowed
 776     # for databases protected by lock and transaction support
 777     # (bsddb databases use locking from bsddb version 4.2.4 onwards).
 778     #
 779     # Therefore, manually perform the removal (we can do this, because
 780     # we know that for bsddb - but *not* anydbm in general - the database
 781     # consists of one file with the name we specify, rather than several
 782     # based on that name).
 783     if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
 784       if os.path.isfile(filename):
 785         os.unlink(filename)
 786       mode = 'c'
 787
 788     self.db = anydbm.open(filename, mode)
 789     self.has_key = self.db.has_key
 790     self.__delitem__ = self.db.__delitem__
 791
 792   def get(self, key, default=None):
 793     """bsddb3 doesn't have a get() method, so define one here."""
 794
 795     try:
 796       return self[key]
 797     except KeyError:
 798       return default
 799
 800
 801 class SDatabase(AbstractDatabase):
 802   """A database that can only store strings."""
 803
 804   def __getitem__(self, key):
 805     return self.db[key]
 806
 807   def __setitem__(self, key, value):
 808     self.db[key] = value
 809
 810
 811 class Database(AbstractDatabase):
 812   """A database that uses the marshal module to store built-in types."""
 813
 814   def __getitem__(self, key):
 815     return marshal.loads(self.db[key])
 816
 817   def __setitem__(self, key, value):
 818     self.db[key] = marshal.dumps(value)
 819
 820
 821 class StatsKeeper:
 822   __shared_state = { }
 823   def __init__(self):
 824     self.__dict__ = self.__shared_state
 825     if self.__dict__:
 826       return
 827     self.filename = temp(STATISTICS_FILE)
 828     Cleanup().register(self.filename, pass8)
 829     # This can get kinda large, so we don't store it in our data dict.
 830     self.repos_files = { }
 831
 832     if os.path.exists(self.filename):
 833       self.unarchive()
 834     else:
 835       self.data = { 'cvs_revs_count' : 0,
 836                     'tags': { },
 837                     'branches' : { },
 838                     'repos_size' : 0,
 839                     'repos_file_count' : 0,
 840                     'svn_rev_count' : None,
 841                     'first_rev_date' : 1L<<32,
 842                     'last_rev_date' : 0,
 843                     'pass_timings' : { },
 844                     'start_time' : 0,
 845                     'end_time' : 0,
 846                     }
 847
 848   def log_duration_for_pass(self, duration, pass_num):
 849     self.data['pass_timings'][pass_num] = duration
 850
 851   def set_start_time(self, start):
 852     self.data['start_time'] = start
 853
 854   def set_end_time(self, end):
 855     self.data['end_time'] = end
 856
 857   def _bump_item(self, key, amount=1):
 858     self.data[key] = self.data[key] + amount
 859
 860   def reset_c_rev_info(self):
 861     self.data['cvs_revs_count'] = 0
 862     self.data['tags'] = { }
 863     self.data['branches'] = { }
 864
 865   def record_c_rev(self, c_rev):
 866     self._bump_item('cvs_revs_count')
 867
 868     for tag in c_rev.tags:
 869       self.data['tags'][tag] = None
 870     for branch in c_rev.branches:
 871       self.data['branches'][branch] = None
 872
 873     if c_rev.timestamp < self.data['first_rev_date']:
 874       self.data['first_rev_date'] = c_rev.timestamp
 875
 876     if c_rev.timestamp > self.data['last_rev_date']:
 877       self.data['last_rev_date'] = c_rev.timestamp
 878
 879     # Only add the size if this is the first time we see the file.
 880     if not self.repos_files.has_key(c_rev.fname):
 881       self._bump_item('repos_size', c_rev.file_size)
 882     self.repos_files[c_rev.fname] = None
 883
 884     self.data['repos_file_count'] = len(self.repos_files)
 885
 886   def set_svn_rev_count(self, count):
 887     self.data['svn_rev_count'] = count
 888
 889   def svn_rev_count(self):
 890     return self.data['svn_rev_count']
 891
 892   def archive(self):
 893     open(self.filename, 'w').write(marshal.dumps(self.data))
 894
 895   def unarchive(self):
 896     self.data = marshal.loads(open(self.filename, 'r').read())
 897
 898   def __str__(self):
 899     svn_revs_str = ""
 900     if self.data['svn_rev_count'] is not None:
 901       svn_revs_str = ('Total SVN Commits:      %10s\n'
 902                       % self.data['svn_rev_count'])
 903
 904     return ('\n'                                \
 905             'cvs2svn Statistics:\n'             \
 906             '------------------\n'              \
 907             'Total CVS Files:        %10i\n'    \
 908             'Total CVS Revisions:    %10i\n'    \
 909             'Total Unique Tags:      %10i\n'    \
 910             'Total Unique Branches:  %10i\n'    \
 911             'CVS Repos Size in KB:   %10i\n'    \
 912             '%s'                                \
 913             'First Revision Date:    %s\n'      \
 914             'Last Revision Date:     %s\n'      \
 915             '------------------'                \
 916             % (self.data['repos_file_count'],
 917                self.data['cvs_revs_count'],
 918                len(self.data['tags']),
 919                len(self.data['branches']),
 920                (self.data['repos_size'] / 1024),
 921                svn_revs_str,
 922                time.ctime(self.data['first_rev_date']),
 923                time.ctime(self.data['last_rev_date']),
 924                ))
 925
 926   def timings(self):
 927     passes = self.data['pass_timings'].keys()
 928     passes.sort()
 929     str = 'Timings:\n------------------\n'
 930
 931     def desc(val):
 932       if val == 1: return "second"
 933       return "seconds"
 934
 935     for pass_num in passes:
 936       duration = int(self.data['pass_timings'][pass_num])
 937       p_str = ('pass %d:%6d %s\n'
 938                % (pass_num, duration, desc(duration)))
 939       str = str + p_str
 940
 941     total = int(self.data['end_time'] - self.data['start_time'])
 942     str = str + ('total: %6d %s' % (total, desc(total)))
 943     return str
 944
 945
 946 class LastSymbolicNameDatabase:
 947   """ Passing every CVSRevision in s-revs to this class will result in
 948   a Database whose key is the last CVS Revision a symbolicname was
 949   seen in, and whose value is a list of all symbolicnames that were
 950   last seen in that revision."""
 951   def __init__(self, mode):
 952     self.symbols = {}
 953     self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
 954     Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
 955
 956   # Once we've gone through all the revs,
 957   # symbols.keys() will be a list of all tags and branches, and
 958   # their corresponding values will be a key into the last CVS revision
 959   # that they were used in.
 960   def log_revision(self, c_rev):
 961     # Gather last CVS Revision for symbolic name info and tag info
 962     for tag in c_rev.tags:
 963       self.symbols[tag] = c_rev.unique_key()
 964     if c_rev.op is not OP_DELETE:
 965       for branch in c_rev.branches:
 966         self.symbols[branch] = c_rev.unique_key()
 967
 968   # Creates an inversion of symbols above--a dictionary of lists (key
 969   # = CVS rev unique_key: val = list of symbols that close in that
 970   # rev.
 971   def create_database(self):
 972     for sym, rev_unique_key in self.symbols.items():
 973       ary = self.symbol_revs_db.get(rev_unique_key, [])
 974       ary.append(sym)
 975       self.symbol_revs_db[rev_unique_key] = ary
 976
 977
 978 class CVSRevisionDatabase:
 979   """A Database to store CVSRevision objects and retrieve them by their
 980   unique_key()."""
 981
 982   def __init__(self, mode):
 983     """Initialize an instance, opening database in MODE (like the MODE
 984     argument to Database or anydbm.open())."""
 985     self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
 986     Cleanup().register(temp(CVS_REVS_DB), pass8)
 987
 988   def log_revision(self, c_rev):
 989     """Add C_REV, a CVSRevision, to the database."""
 990     self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
 991
 992   def get_revision(self, unique_key):
 993     """Return the CVSRevision stored under UNIQUE_KEY."""
 994     return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
 995
 996
 997 def TagsDatabase(mode):
 998   """A Database to store which symbolic names are tags.
 999   Each key is a tag name.
1000   The value has no meaning, and should be set to None."""
1001   db = SDatabase(temp(TAGS_DB), mode)
1002   Cleanup().register(temp(TAGS_DB), pass8)
1003   return db
1004
1005
1006 class Project:
1007   """A project within a CVS repository."""
1008
1009   def __init__(self, cvs_root, trunk_path, branches_path, tags_path):
1010     """Create a new Project record.
1011
1012     CVS_ROOT is the main CVS directory for this project (within the
1013     filesystem).  TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH are the
1014     full, normalized directory names in svn for the corresponding part
1015     of the repository."""
1016
1017     self.cvs_root = os.path.normpath(cvs_root)
1018     self.trunk_path = trunk_path
1019     self.branches_path = branches_path
1020     self.tags_path = tags_path
1021     verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1022
1023   def is_source(self, svn_path):
1024     """Return True iff SVN_PATH is a legitimate source for this project.
1025
1026     Legitimate paths are self.trunk_path or any directory directly
1027     under self.branches_path."""
1028
1029     if svn_path == self.trunk_path:
1030       return True
1031
1032     (head, tail,) = _path_split(svn_path)
1033     if head == self.branches_path:
1034       return True
1035
1036     return False
1037
1038   def is_unremovable(self, svn_path):
1039     """Return True iff the specified path must not be removed."""
1040
1041     return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1042
1043   def relative_name(self, fname):
1044     """Return the path to FNAME relative to cvs_root, with ',v' removed.
1045
1046     FNAME is a filesystem name that has to begin (textually) with
1047     self.cvs_root and end with ',v'."""
1048
1049     if not fname.startswith(self.cvs_root):
1050       raise FatalError(
1051           "relative_name: '%s' is not a sub-path of '%s'"
1052           % (fname, self.cvs_root,))
1053     if not fname.endswith(',v'):
1054       raise FatalError("relative_name: '%s' does not end with ',v'"
1055                        % (fname,))
1056     l = len(self.cvs_root)
1057     if fname[l] == os.sep:
1058       l += 1
1059     return string.replace(fname[l:-2], os.sep, '/')
1060
1061   def get_branch_path(self, branch_name):
1062     """Return the svnpath for the branch named BRANCH_NAME."""
1063
1064     return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1065
1066   def get_tag_path(self, tag_name):
1067     """Return the svnpath for the tag named TAG_NAME."""
1068
1069     return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1070
1071   def make_trunk_path(self, path):
1072     """Return the trunk path for PATH.
1073
1074     PATH is a filesystem path relative to cvs_root.  Return the svn
1075     path for this file on trunk."""
1076
1077     return _path_join(self.trunk_path, path)
1078
1079   def make_branch_path(self, branch_name, path):
1080     """Return the branch path for PATH on the branch with name BRANCH_NAME.
1081
1082     PATH is a filesystem path relative to cvs_root.  Return the svn
1083     path for this file on the specified branch."""
1084
1085     return _path_join(self.get_branch_path(branch_name), path)
1086
1087
1088 class CVSRevision:
1089   def __init__(self, ctx, *args):
1090     """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1091
1092     If CTX is None, the following members and methods of the
1093     instantiated CVSRevision class object will be unavailable (or
1094     simply will not work correctly, if at all):
1095        cvs_path
1096        svn_path
1097        is_default_branch_revision()
1098
1099     (Note that this class treats CTX as const, because the caller
1100     likely passed in a Borg instance of a Ctx.  The reason this class
1101     takes CTX as as a parameter, instead of just instantiating a Ctx
1102     itself, is that this class should be usable outside cvs2svn.)
1103
1104     If there is one argument in ARGS, it is a string, in the format of
1105     a line from a revs file.  Do *not* include a trailing newline.
1106
1107     If there are multiple ARGS, there must be 17 of them,
1108     comprising a parsed revs line:
1109        timestamp       -->  (int) date stamp for this cvs revision
1110        digest          -->  (string) digest of author+logmsg
1111        prev_timestamp  -->  (int) date stamp for the previous cvs revision
1112        next_timestamp  -->  (int) date stamp for the next cvs revision
1113        op              -->  (char) OP_ADD, OP_CHANGE, or OP_DELETE
1114        prev_rev        -->  (string or None) previous CVS rev, e.g., "1.2"
1115        rev             -->  (string) this CVS rev, e.g., "1.3"
1116        next_rev        -->  (string or None) next CVS rev, e.g., "1.4"
1117        file_in_attic   -->  (char or None) true if RCS file is in Attic
1118        file_executable -->  (char or None) true if RCS file has exec bit set.
1119        file_size       -->  (int) size of the RCS file
1120        deltatext_code  -->  (char) 'N' if non-empty deltatext, else 'E'
1121        fname           -->  (string) relative path of file in CVS repos
1122        mode            -->  (string or None) "kkv", "kb", etc.
1123        branch_name     -->  (string or None) branch on which this rev occurred
1124        tags            -->  (list of strings) all tags on this revision
1125        branches        -->  (list of strings) all branches rooted in this rev
1126
1127     The two forms of initialization are equivalent.
1128
1129     WARNING: Due to the resync process in pass2, prev_timestamp or
1130     next_timestamp may be incorrect in the c-revs or s-revs files."""
1131
1132     self._ctx = ctx
1133     if len(args) == 17:
1134       (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1135        self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1136        self.file_executable, self.file_size, self.deltatext_code,
1137        self.fname,
1138        self.mode, self.branch_name, self.tags, self.branches) = args
1139     elif len(args) == 1:
1140       data = args[0].split(' ', 15)
1141       (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1142        self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1143        self.file_executable, self.file_size, self.deltatext_code,
1144        self.mode, self.branch_name, numtags, remainder) = data
1145       # Patch up data items which are not simple strings
1146       self.timestamp = int(self.timestamp, 16)
1147       if self.prev_timestamp == "*":
1148         self.prev_timestamp = 0
1149       else:
1150         self.prev_timestamp = int(self.prev_timestamp)
1151       if self.next_timestamp == "*":
1152         self.next_timestamp = 0
1153       else:
1154         self.next_timestamp = int(self.next_timestamp)
1155       if self.prev_rev == "*":
1156         self.prev_rev = None
1157       if self.next_rev == "*":
1158         self.next_rev = None
1159       if self.file_in_attic == "*":
1160         self.file_in_attic = None
1161       if self.file_executable == "*":
1162         self.file_executable = None
1163       self.file_size = int(self.file_size)
1164       if self.mode == "*":
1165         self.mode = None
1166       if self.branch_name == "*":
1167         self.branch_name = None
1168       numtags = int(numtags)
1169       tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1170       self.tags = tags_and_numbranches_and_remainder[:-2]
1171       numbranches = int(tags_and_numbranches_and_remainder[-2])
1172       remainder = tags_and_numbranches_and_remainder[-1]
1173       branches_and_fname = remainder.split(' ', numbranches)
1174       self.branches = branches_and_fname[:-1]
1175       self.fname = branches_and_fname[-1]
1176     else:
1177       raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1178           (len(args) + 1)
1179     if ctx is not None:
1180       self.cvs_path = ctx.project.relative_name(self.fname)
1181       if self.branch_name:
1182         self.svn_path = ctx.project.make_branch_path(
1183             self.branch_name, self.cvs_path)
1184       else:
1185         self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1186
1187   # The 'primary key' of a CVS Revision is the revision number + the
1188   # filename.  To provide a unique key (say, for a dict), we just glom
1189   # them together in a string.  By passing in self.prev_rev or
1190   # self.next_rev, you can get the unique key for their respective
1191   # CVSRevisions.
1192   def unique_key(self, revnum="0"):
1193     if revnum is "0":
1194       revnum = self.rev
1195     elif revnum is None:
1196       return None
1197     return revnum + "/" + self.fname
1198
1199   def __str__(self):
1200     return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1201             % (self.timestamp, self.digest, self.prev_timestamp or "*",
1202               self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1203               self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1204               (self.file_executable or "*"),
1205               self.file_size,
1206               self.deltatext_code, (self.mode or "*"),
1207               (self.branch_name or "*"),
1208               len(self.tags), self.tags and " " or "", " ".join(self.tags),
1209               len(self.branches), self.branches and " " or "",
1210               " ".join(self.branches),
1211               self.fname, ))
1212
1213   # Returns true if this CVSRevision is the opening CVSRevision for
1214   # NAME (for this RCS file).
1215   def opens_symbolic_name(self, name):
1216     if name in self.tags:
1217       return 1
1218     if name in self.branches:
1219       # If this c_rev opens a branch and our op is OP_DELETE, then
1220       # that means that the file that this c_rev belongs to was
1221       # created on the branch, so for all intents and purposes, this
1222       # c_rev is *technically* not an opening.  See Issue #62 for more
1223       # information.
1224       if self.op != OP_DELETE:
1225         return 1
1226     return 0
1227
1228   def is_default_branch_revision(self):
1229     """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1230     revision according to DEFAULT_BRANCHES_DB (see the conditions
1231     documented there), else return None."""
1232     val = self._ctx._default_branches_db.get(self.cvs_path, None)
1233     if val is not None:
1234       val_last_dot = val.rindex(".")
1235       our_last_dot = self.rev.rindex(".")
1236       default_branch = val[:val_last_dot]
1237       our_branch = self.rev[:our_last_dot]
1238       default_rev_component = int(val[val_last_dot + 1:])
1239       our_rev_component = int(self.rev[our_last_dot + 1:])
1240       if (default_branch == our_branch
1241           and our_rev_component <= default_rev_component):
1242         return 1
1243     # else
1244     return None
1245
1246   def rcs_path(self):
1247     """Returns the actual filesystem path to the RCS file of this
1248     CVSRevision."""
1249     if self.file_in_attic is None:
1250       return self.fname
1251     else:
1252       basepath, filename = os.path.split(self.fname)
1253       return os.path.join(basepath, 'Attic', filename)
1254
1255   def filename(self):
1256     "Return the last path component of self.fname, minus the ',v'"
1257     return os.path.split(self.fname)[-1][:-2]
1258
1259 class SymbolDatabase:
1260   """This database records information on all symbols in the RCS
1261   files.  It is created in pass 1 and it is used in pass 2."""
1262   def __init__(self):
1263     # A hash that maps tag names to commit counts
1264     self.tags = { }
1265     # A hash that maps branch names to lists of the format
1266     # [ create_count, commit_count, blockers ], where blockers
1267     # is a hash that lists the symbols that depend on the
1268     # the branch.  The blockers hash is used as a set, so the
1269     # values are not used.
1270     self.branches = { }
1271
1272   def register_tag_creation(self, name):
1273     """Register the creation of the tag NAME."""
1274     self.tags[name] = self.tags.get(name, 0) + 1
1275
1276   def _branch(self, name):
1277     """Helper function to get a branch node that will create and
1278     initialize the node if it does not exist."""
1279     if not self.branches.has_key(name):
1280       self.branches[name] = [ 0, 0, { } ]
1281     return self.branches[name]
1282
1283   def register_branch_creation(self, name):
1284     """Register the creation of the branch NAME."""
1285     self._branch(name)[0] += 1
1286
1287   def register_branch_commit(self, name):
1288     """Register a commit on the branch NAME."""
1289     self._branch(name)[1] += 1
1290
1291   def register_branch_blocker(self, name, blocker):
1292     """Register BLOCKER as a blocker on the branch NAME."""
1293     self._branch(name)[2][blocker] = None
1294
1295   def branch_has_commit(self, name):
1296     """Return non-zero if NAME has commits.  Returns 0 if name
1297     is not a branch or if it has no commits."""
1298     return self.branches.has_key(name) and self.branches[name][1]
1299
1300   def find_excluded_symbols(self, regexp_list):
1301     """Returns a hash of all symbols thaht match the regexps in
1302     REGEXP_LISTE.  The hash is used as a set so the values are
1303     not used."""
1304     excludes = { }
1305     for tag in self.tags.keys():
1306       if match_regexp_list(regexp_list, tag):
1307         excludes[tag] = None
1308     for branch in self.branches.keys():
1309       if match_regexp_list(regexp_list, branch):
1310         excludes[branch] = None
1311     return excludes
1312
1313   def find_branch_exclude_blockers(self, branch, excludes):
1314     """Find all blockers of BRANCH, excluding the ones in the hash
1315     EXCLUDES."""
1316     blockers = { }
1317     if excludes.has_key(branch):
1318       for blocker in self.branches[branch][2]:
1319         if not excludes.has_key(blocker):
1320           blockers[blocker] = None
1321     return blockers
1322
1323   def find_blocked_excludes(self, excludes):
1324     """Find all branches not in EXCLUDES that have blocking symbols that
1325     are not themselves excluded.  Return a hash that maps branch names
1326     to a hash of blockers.  The hash of blockes is used as a set so the
1327     values are not used."""
1328     blocked_branches = { }
1329     for branch in self.branches.keys():
1330       blockers = self.find_branch_exclude_blockers(branch, excludes)
1331       if blockers:
1332         blocked_branches[branch] = blockers
1333     return blocked_branches
1334
1335   def find_mismatches(self, excludes=None):
1336     """Find all symbols that are defined as both tags and branches,
1337     excluding the ones in EXCLUDES.  Returns a list of 4-tuples with
1338     the symbol name, tag count, branch count and commit count."""
1339     if excludes is None:
1340       excludes = { }
1341     mismatches = [ ]
1342     for branch in self.branches.keys():
1343       if not excludes.has_key(branch) and self.tags.has_key(branch):
1344         mismatches.append((branch,                    # name
1345                            self.tags[branch],         # tag count
1346                            self.branches[branch][0],  # branch count
1347                            self.branches[branch][1])) # commit count
1348     return mismatches
1349
1350   def read(self):
1351     """Read the symbol database from files."""
1352     f = open(temp(TAGS_LIST))
1353     while 1:
1354       line = f.readline()
1355       if not line:
1356         break
1357       tag, count = line.split()
1358       self.tags[tag] = int(count)
1359
1360     f = open(temp(BRANCHES_LIST))
1361     while 1:
1362       line = f.readline()
1363       if not line:
1364         break
1365       words = line.split()
1366       self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1367       for blocker in words[3:]:
1368         self.branches[words[0]][2][blocker] = None
1369
1370   def write(self):
1371     """Store the symbol database to files."""
1372     f = open(temp(TAGS_LIST), "w")
1373     Cleanup().register(temp(TAGS_LIST), pass2)
1374     for tag, count in self.tags.items():
1375       f.write("%s %d\n" % (tag, count))
1376
1377     f = open(temp(BRANCHES_LIST), "w")
1378     Cleanup().register(temp(BRANCHES_LIST), pass2)
1379     for branch, info in self.branches.items():
1380       f.write("%s %d %d" % (branch, info[0], info[1]))
1381       if info[2]:
1382         f.write(" ")
1383         f.write(" ".join(info[2].keys()))
1384       f.write("\n")
1385
1386 class CollectData(cvs2svn_rcsparse.Sink):
1387   def __init__(self):
1388     self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1389     Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1390     self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1391     Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1392     self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1393                                          DB_OPEN_NEW)
1394     Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1395     self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1396     Cleanup().register(temp(METADATA_DB), pass8)
1397     self.fatal_errors = []
1398     self.num_files = 0
1399     self.symbol_db = SymbolDatabase()
1400
1401     # 1 if we've collected data for at least one file, None otherwise.
1402     self.found_valid_file = None
1403
1404     # See set_fname() for initializations of other variables.
1405
1406   def set_fname(self, canonical_name, filename):
1407     """Prepare to receive data for FILENAME.  FILENAME is the absolute
1408     filesystem path to the file in question, and CANONICAL_NAME is
1409     FILENAME with the 'Attic' component removed (if the file is indeed
1410     in the Attic) ."""
1411     self.fname = canonical_name
1412
1413     # We calculate and save some file metadata here, where we can do
1414     # it only once per file, instead of waiting until later where we
1415     # would have to do the same calculations once per CVS *revision*.
1416
1417     self.rel_name = Ctx().project.relative_name(self.fname)
1418
1419     # If the paths are not the same, then that means that the
1420     # canonical_name has had the 'Attic' component stripped out.
1421     self.file_in_attic = None
1422     if canonical_name != filename:
1423       self.file_in_attic = 1
1424
1425     file_stat = os.stat(filename)
1426     # The size of our file in bytes
1427     self.file_size = file_stat[stat.ST_SIZE]
1428
1429     # Whether or not the executable bit is set.
1430     self.file_executable = None
1431     if file_stat[0] & stat.S_IXUSR:
1432       self.file_executable = 1
1433
1434     # revision -> [timestamp, author, old-timestamp]
1435     self.rev_data = { }
1436
1437     # Maps revision number (key) to the revision number of the
1438     # previous revision along this line of development.
1439     #
1440     # For the first revision R on a branch, we consider the revision
1441     # from which R sprouted to be the 'previous'.
1442     #
1443     # Note that this revision can't be determined arithmetically (due
1444     # to cvsadmin -o, which is why this is necessary).
1445     #
1446     # If the key has no previous revision, then store None as key's
1447     # value.
1448     self.prev_rev = { }
1449
1450     # This dict is essentially self.prev_rev with the values mapped in
1451     # the other direction, so following key -> value will yield you
1452     # the next revision number.
1453     #
1454     # Unlike self.prev_rev, if the key has no next revision, then the
1455     # key is not present.
1456     self.next_rev = { }
1457
1458     # Track the state of each revision so that in set_revision_info,
1459     # we can determine if our op is an add/change/delete.  We can do
1460     # this because in set_revision_info, we'll have all of the
1461     # revisions for a file at our fingertips, and we need to examine
1462     # the state of our prev_rev to determine if we're an add or a
1463     # change--without the state of the prev_rev, we are unable to
1464     # distinguish between an add and a change.
1465     self.rev_state = { }
1466
1467     # Hash mapping branch numbers, like '1.7.2', to branch names,
1468     # like 'Release_1_0_dev'.
1469     self.branch_names = { }
1470
1471     # RCS flags (used for keyword expansion).
1472     self.mode = None
1473
1474     # Hash mapping revision numbers, like '1.7', to lists of names
1475     # indicating which branches sprout from that revision, like
1476     # ['Release_1_0_dev', 'experimental_driver', ...].
1477     self.branchlist = { }
1478
1479     # Like self.branchlist, but the values are lists of tag names that
1480     # apply to the key revision.
1481     self.taglist = { }
1482
1483     # If set, this is an RCS branch number -- rcsparse calls this the
1484     # "principal branch", but CVS and RCS refer to it as the "default
1485     # branch", so that's what we call it, even though the rcsparse API
1486     # setter method is still 'set_principal_branch'.
1487     self.default_branch = None
1488
1489     # If the RCS file doesn't have a default branch anymore, but does
1490     # have vendor revisions, then we make an educated guess that those
1491     # revisions *were* the head of the default branch up until the
1492     # commit of 1.2, at which point the file's default branch became
1493     # trunk.  This records the date at which 1.2 was committed.
1494     self.first_non_vendor_revision_date = None
1495
1496     # A list of all symbols defined for the current file.  Used to
1497     # prevent multiple definitions of a symbol, something which can
1498     # easily happen when --symbol-transform is used.
1499     self.defined_symbols = { }
1500
1501   def set_principal_branch(self, branch):
1502     self.default_branch = branch
1503
1504   def set_expansion(self, mode):
1505     self.mode = mode
1506
1507   def set_branch_name(self, branch_number, name):
1508     """Record that BRANCH_NUMBER is the branch number for branch NAME,
1509     and that NAME sprouts from BRANCH_NUMBER .
1510     BRANCH_NUMBER is an RCS branch number with an odd number of components,
1511     for example '1.7.2' (never '1.7.0.2')."""
1512     if not self.branch_names.has_key(branch_number):
1513       self.branch_names[branch_number] = name
1514       # The branchlist is keyed on the revision number from which the
1515       # branch sprouts, so strip off the odd final component.
1516       sprout_rev = branch_number[:branch_number.rfind(".")]
1517       self.branchlist.setdefault(sprout_rev, []).append(name)
1518       self.symbol_db.register_branch_creation(name)
1519     else:
1520       sys.stderr.write("%s: in '%s':\n"
1521                        "   branch '%s' already has name '%s',\n"
1522                        "   cannot also have name '%s', ignoring the latter\n"
1523                        % (warning_prefix, self.fname, branch_number,
1524                           self.branch_names[branch_number], name))
1525
1526   def rev_to_branch_name(self, revision):
1527     """Return the name of the branch on which REVISION lies.
1528     REVISION is a non-branch revision number with an even number of,
1529     components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1530     For the convenience of callers, REVISION can also be a trunk
1531     revision such as '1.2', in which case just return None."""
1532     if trunk_rev.match(revision):
1533       return None
1534     return self.branch_names.get(revision[:revision.rindex(".")])
1535
1536   def add_cvs_branch(self, revision, branch_name):
1537     """Record the root revision and branch revision for BRANCH_NAME,
1538     based on REVISION.  REVISION is a CVS branch number having an even
1539     number of components where the second-to-last is '0'.  For
1540     example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1541     from 1.7 and has branch number 1.7.2."""
1542     last_dot = revision.rfind(".")
1543     branch_rev = revision[:last_dot]
1544     last2_dot = branch_rev.rfind(".")
1545     branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1546     self.set_branch_name(branch_rev, branch_name)
1547
1548   def define_tag(self, name, revision):
1549     """Record a bidirectional mapping between symbolic NAME and REVISION.
1550     REVISION is an unprocessed revision number from the RCS file's
1551     header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1552     This function will determine what kind of symbolic name it is by
1553     inspection, and record it in the right places."""
1554     for (pattern, replacement) in Ctx().symbol_transforms:
1555       newname = pattern.sub(replacement, name)
1556       if newname != name:
1557         Log().write(LOG_WARN, "   symbol '%s' transformed to '%s'"
1558                     % (name, newname))
1559         name = newname
1560     if self.defined_symbols.has_key(name):
1561       err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1562                 % (error_prefix, name, self.fname)
1563       sys.stderr.write(err + "\n")
1564       self.fatal_errors.append(err)
1565     self.defined_symbols[name] = None
1566     if branch_tag.match(revision):
1567       self.add_cvs_branch(revision, name)
1568     elif vendor_tag.match(revision):
1569       self.set_branch_name(revision, name)
1570     else:
1571       self.taglist.setdefault(revision, []).append(name)
1572       self.symbol_db.register_tag_creation(name)
1573
1574   def define_revision(self, revision, timestamp, author, state,
1575                       branches, next):
1576
1577     # Record the state of our revision for later calculations
1578     self.rev_state[revision] = state
1579
1580     # store the rev_data as a list in case we have to jigger the timestamp
1581     self.rev_data[revision] = [int(timestamp), author, None]
1582
1583     # When on trunk, the RCS 'next' revision number points to what
1584     # humans might consider to be the 'previous' revision number.  For
1585     # example, 1.3's RCS 'next' is 1.2.
1586     #
1587     # However, on a branch, the RCS 'next' revision number really does
1588     # point to what humans would consider to be the 'next' revision
1589     # number.  For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1590     #
1591     # In other words, in RCS, 'next' always means "where to find the next
1592     # deltatext that you need this revision to retrieve.
1593     #
1594     # That said, we don't *want* RCS's behavior here, so we determine
1595     # whether we're on trunk or a branch and set self.prev_rev
1596     # accordingly.
1597     #
1598     # One last thing.  Note that if REVISION is a branch revision,
1599     # instead of mapping REVISION to NEXT, we instead map NEXT to
1600     # REVISION.  Since we loop over all revisions in the file before
1601     # doing anything with the data we gather here, this 'reverse
1602     # assignment' effectively does the following:
1603     #
1604     # 1. Gives us no 'prev' value for REVISION (in this
1605     # iteration... it may have been set in a previous iteration)
1606     #
1607     # 2. Sets the 'prev' value for the revision with number NEXT to
1608     # REVISION.  So when we come around to the branch revision whose
1609     # revision value is NEXT, its 'prev' and 'prev_rev' are already
1610     # set.
1611     if trunk_rev.match(revision):
1612       self.prev_rev[revision] = next
1613       self.next_rev[next] = revision
1614     elif next:
1615       self.prev_rev[next] = revision
1616       self.next_rev[revision] = next
1617
1618     for b in branches:
1619       self.prev_rev[b] = revision
1620
1621     # Ratchet up the highest vendor head revision, if necessary.
1622     if self.default_branch:
1623       default_branch_root = self.default_branch + "."
1624       if ((revision.find(default_branch_root) == 0)
1625           and (default_branch_root.count('.') == revision.count('.'))):
1626         # This revision is on the default branch, so record that it is
1627         # the new highest default branch head revision.
1628         self.default_branches_db[self.rel_name] = revision
1629     else:
1630       # No default branch, so make an educated guess.
1631       if revision == '1.2':
1632         # This is probably the time when the file stopped having a
1633         # default branch, so make a note of it.
1634         self.first_non_vendor_revision_date = timestamp
1635       else:
1636         m = vendor_revision.match(revision)
1637         if m and ((not self.first_non_vendor_revision_date)
1638                   or (timestamp < self.first_non_vendor_revision_date)):
1639           # We're looking at a vendor revision, and it wasn't
1640           # committed after this file lost its default branch, so bump
1641           # the maximum trunk vendor revision in the permanent record.
1642           self.default_branches_db[self.rel_name] = revision
1643
1644     if not trunk_rev.match(revision):
1645       # Check for unlabeled branches, record them.  We tried to collect
1646       # all branch names when we parsed the symbolic name header
1647       # earlier, of course, but that didn't catch unlabeled branches.
1648       # If a branch is unlabeled, this is our first encounter with it,
1649       # so we have to record its data now.
1650       branch_number = revision[:revision.rindex(".")]
1651       if not self.branch_names.has_key(branch_number):
1652         branch_name = "unlabeled-" + branch_number
1653         self.set_branch_name(branch_number, branch_name)
1654
1655       # Register the commit on this non-trunk branch
1656       branch_name = self.branch_names[branch_number]
1657       self.symbol_db.register_branch_commit(branch_name)
1658
1659   def tree_completed(self):
1660     "The revision tree has been parsed.  Analyze it for consistency."
1661
1662     # Our algorithm depends upon the timestamps on the revisions occuring
1663     # monotonically over time.  That is, we want to see rev 1.34 occur in
1664     # time before rev 1.35.  If we inserted 1.35 *first* (due to the time-
1665     # sorting), and then tried to insert 1.34, we'd be screwed.
1666
1667     # to perform the analysis, we'll simply visit all of the 'previous'
1668     # links that we have recorded and validate that the timestamp on the
1669     # previous revision is before the specified revision
1670
1671     # if we have to resync some nodes, then we restart the scan. just keep
1672     # looping as long as we need to restart.
1673     while 1:
1674       for current, prev in self.prev_rev.items():
1675         if not prev:
1676           # no previous revision exists (i.e. the initial revision)
1677           continue
1678         t_c = self.rev_data[current][0]
1679         t_p = self.rev_data[prev][0]
1680         if t_p >= t_c:
1681           # the previous revision occurred later than the current revision.
1682           # shove the previous revision back in time (and any before it that
1683           # may need to shift).
1684
1685           # We sync backwards and not forwards because any given CVS
1686           # Revision has only one previous revision.  However, a CVS
1687           # Revision can *be* a previous revision for many other
1688           # revisions (e.g., a revision that is the source of multiple
1689           # branches).  This becomes relevant when we do the secondary
1690           # synchronization in pass 2--we can make certain that we
1691           # don't resync a revision earlier than it's previous
1692           # revision, but it would be non-trivial to make sure that we
1693           # don't resync revision R *after* any revisions that have R
1694           # as a previous revision.
1695           while t_p >= t_c:
1696             self.rev_data[prev][0] = t_c - 1    # new timestamp
1697             self.rev_data[prev][2] = t_p        # old timestamp
1698             delta = t_c - 1 - t_p
1699             msg =  "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1700                   % (self.rel_name, prev, time.ctime(t_p), delta)
1701             Log().write(LOG_VERBOSE, msg)
1702             if (delta > COMMIT_THRESHOLD
1703                 or delta < (COMMIT_THRESHOLD * -1)):
1704               str = "%s: Significant timestamp change for '%s' (%d seconds)"
1705               Log().write(LOG_WARN,
1706                           str % (warning_prefix, self.rel_name, delta))
1707             current = prev
1708             prev = self.prev_rev[current]
1709             if not prev:
1710               break
1711             t_c = t_c - 1               # self.rev_data[current][0]
1712             t_p = self.rev_data[prev][0]
1713
1714           # break from the for-loop
1715           break
1716       else:
1717         # finished the for-loop (no resyncing was performed)
1718         return
1719
1720   def set_revision_info(self, revision, log, text):
1721     timestamp, author, old_ts = self.rev_data[revision]
1722     digest = sha.new(log + '\0' + author).hexdigest()
1723     if old_ts:
1724       # the timestamp on this revision was changed. log it for later
1725       # resynchronization of other files's revisions that occurred
1726       # for this time and log message.
1727       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1728
1729     # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1730     #       -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1731     #
1732     # If revision 1.1 appears to have been created via 'cvs add'
1733     # instead of 'cvs import', then this file probably never had a
1734     # default branch, so retroactively remove its record in the
1735     # default branches db.  The test is that the log message CVS uses
1736     # for 1.1 in imports is "Initial revision\n" with no period.
1737     if revision == '1.1' and log != 'Initial revision\n':
1738       try:
1739         del self.default_branches_db[self.rel_name]
1740       except KeyError:
1741         pass
1742
1743     # Get the timestamps of the previous and next revisions
1744     prev_rev = self.prev_rev[revision]
1745     prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1746
1747     next_rev = self.next_rev.get(revision)
1748     next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1749
1750     # How to tell if a CVSRevision is an add, a change, or a deletion:
1751     #
1752     # It's a delete if RCS state is 'dead'
1753     #
1754     # It's an add if RCS state is 'Exp.' and
1755     #      - we either have no previous revision
1756     #        or
1757     #      - we have a previous revision whose state is 'dead'
1758     #
1759     # Anything else is a change.
1760     if self.rev_state[revision] == 'dead':
1761       op = OP_DELETE
1762     elif ((self.prev_rev.get(revision, None) is None)
1763           or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1764       op = OP_ADD
1765     else:
1766       op = OP_CHANGE
1767
1768     def is_branch_revision(rev):
1769       """Return True if this revision is not a trunk revision,
1770       else return False."""
1771       if rev.count('.') >= 3:
1772         return True
1773       return False
1774
1775     def is_same_line_of_development(rev1, rev2):
1776       """Return True if rev1 and rev2 are on the same line of
1777       development (i.e., both on trunk, or both on the same branch);
1778       return False otherwise.  Either rev1 or rev2 can be None, in
1779       which case automatically return False."""
1780       if rev1 is None or rev2 is None:
1781         return False
1782       if rev1.count('.') == 1 and rev2.count('.') == 1:
1783         return True
1784       if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1785         return True
1786       return False
1787
1788     # There can be an odd situation where the tip revision of a branch
1789     # is alive, but every predecessor on the branch is in state 'dead',
1790     # yet the revision from which the branch sprouts is alive.  (This
1791     # is sort of a mirror image of the more common case of adding a
1792     # file on a branch, in which the first revision on the branch is
1793     # alive while the revision from which it sprouts is dead.)
1794     #
1795     # In this odd situation, we must mark the first live revision on
1796     # the branch as an OP_CHANGE instead of an OP_ADD, because it
1797     # reflects, however indirectly, a change w.r.t. the source
1798     # revision from which the branch sprouts.
1799     #
1800     # This is issue #89.
1801     cur_num = revision
1802     if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1803       while 1:
1804         prev_num = self.prev_rev.get(cur_num, None)
1805         if not cur_num or not prev_num:
1806           break
1807         if (not is_same_line_of_development(cur_num, prev_num)
1808             and self.rev_state[cur_num] == 'dead'
1809             and self.rev_state[prev_num] != 'dead'):
1810           op = OP_CHANGE
1811         cur_num = self.prev_rev.get(cur_num, None)
1812
1813     if text:
1814       deltatext_code = DELTATEXT_NONEMPTY
1815     else:
1816       deltatext_code = DELTATEXT_EMPTY
1817
1818     c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1819                         next_timestamp, op,
1820                         prev_rev, revision, next_rev,
1821                         self.file_in_attic, self.file_executable,
1822                         self.file_size,
1823                         deltatext_code, self.fname,
1824                         self.mode, self.rev_to_branch_name(revision),
1825                         self.taglist.get(revision, []),
1826                         self.branchlist.get(revision, []))
1827     self.revs.write(str(c_rev) + "\n")
1828     StatsKeeper().record_c_rev(c_rev)
1829
1830     if not self.metadata_db.has_key(digest):
1831       self.metadata_db[digest] = (author, log)
1832
1833   def parse_completed(self):
1834     # Walk through all branches and tags and register them with
1835     # their parent branch in the symbol database.
1836     for revision, symbols in self.taglist.items() + self.branchlist.items():
1837       for symbol in symbols:
1838         name = self.rev_to_branch_name(revision)
1839         if name is not None:
1840           self.symbol_db.register_branch_blocker(name, symbol)
1841
1842     self.num_files = self.num_files + 1
1843
1844   def write_symbol_db(self):
1845     self.symbol_db.write()
1846
1847 class SymbolingsLogger:
1848   """Manage the file that contains lines for symbol openings and
1849   closings.
1850
1851   This data will later be used to determine valid SVNRevision ranges
1852   from which a file can be copied when creating a branch or tag in
1853   Subversion.  Do this by finding "Openings" and "Closings" for each
1854   file copied onto a branch or tag.
1855
1856   An "Opening" is the CVSRevision from which a given branch/tag
1857   sprouts on a path.
1858
1859   The "Closing" for that branch/tag and path is the next CVSRevision
1860   on the same line of development as the opening.
1861
1862   For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1863   obviously sprouts from revision 1.2.  Therefore, 1.2 is the opening
1864   for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1865   'foo.c'.  Note that there may be many revisions chronologically
1866   between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1867   perhaps even including on branch BEE itself.  But 1.3 is the next
1868   revision *on the same line* as 1.2, that is why it is the closing
1869   revision for those symbolic names of which 1.2 is the opening.
1870
1871   The reason for doing all this hullabaloo is to make branch and tag
1872   creation as efficient as possible by minimizing the number of copies
1873   and deletes per creation.  For example, revisions 1.2 and 1.3 of
1874   foo.c might correspond to revisions 17 and 30 in Subversion.  That
1875   means that when creating branch BEE, there is some motivation to do
1876   the copy from one of 17-30.  Now if there were another file,
1877   'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1878   to revisions 24 and 39 in Subversion, we would know that the ideal
1879   thing would be to copy the branch from somewhere between 24 and 29,
1880   inclusive.
1881   """
1882   def __init__(self):
1883     self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1884     Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1885     self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1886     Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1887
1888     # This keys of this dictionary are *source* cvs_paths for which
1889     # we've encountered an 'opening' on the default branch.  The
1890     # values are the (uncleaned) symbolic names that this path has
1891     # opened.
1892     self.open_paths_with_default_branches = { }
1893
1894   def log_revision(self, c_rev, svn_revnum):
1895     """Log any openings found in C_REV, and if C_REV.next_rev is not
1896     None, a closing.  The opening uses SVN_REVNUM, but the closing (if
1897     any) will have its revnum determined later."""
1898     for name in c_rev.tags + c_rev.branches:
1899       self._note_default_branch_opening(c_rev, name)
1900       if c_rev.op != OP_DELETE:
1901         self._log(name, svn_revnum,
1902                   c_rev.cvs_path, c_rev.branch_name, OPENING)
1903
1904       # If our c_rev has a next_rev, then that's the closing rev for
1905       # this source revision.  Log it to closings for later processing
1906       # since we don't know the svn_revnum yet.
1907       if c_rev.next_rev is not None:
1908         self.closings.write('%s %s\n' %
1909                             (name, c_rev.unique_key(c_rev.next_rev)))
1910
1911   def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1912     """Write out a single line to the symbol_openings_closings file
1913     representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1914     opening or closing (TYPE) of NAME (a symbolic name).
1915
1916     TYPE should only be one of the following global constants:
1917     OPENING or CLOSING."""
1918     # 8 places gives us 999,999,999 SVN revs.  That *should* be enough.
1919     self.symbolings.write(
1920         '%s %.8d %s %s %s\n'
1921         % (name, svn_revnum, type, branch_name or '*', cvs_path))
1922
1923   def close(self):
1924     """Iterate through the closings file, lookup the svn_revnum for
1925     each closing CVSRevision, and write a proper line out to the
1926     symbolings file."""
1927     # Use this to get the c_rev of our rev_key
1928     cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1929
1930     self.closings.close()
1931     for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1932       (name, rev_key) = line.rstrip().split(" ", 1)
1933       svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1934
1935       c_rev = cvs_revs_db.get_revision(rev_key)
1936       self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1937
1938     self.symbolings.close()
1939
1940   def _note_default_branch_opening(self, c_rev, symbolic_name):
1941     """If C_REV is a default branch revision, log C_REV.cvs_path as an
1942     opening for SYMBOLIC_NAME."""
1943     self.open_paths_with_default_branches.setdefault(
1944         c_rev.cvs_path, []).append(symbolic_name)
1945
1946   def log_default_branch_closing(self, c_rev, svn_revnum):
1947     """If self.open_paths_with_default_branches contains
1948     C_REV.cvs_path, then call log each name in
1949     self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1950     with SVN_REVNUM as the closing revision number."""
1951     path = c_rev.cvs_path
1952     if self.open_paths_with_default_branches.has_key(path):
1953       # log each symbol as a closing
1954       for name in self.open_paths_with_default_branches[path]:
1955         self._log(name, svn_revnum, path, None, CLOSING)
1956       # Remove them from the openings list as we're done with them.
1957       del self.open_paths_with_default_branches[path]
1958
1959
1960 class PersistenceManager:
1961   """The PersistenceManager allows us to effectively store SVNCommits
1962   to disk and retrieve them later using only their subversion revision
1963   number as the key.  It also returns the subversion revision number
1964   for a given CVSRevision's unique key.
1965
1966   All information pertinent to each SVNCommit is stored in a series of
1967   on-disk databases so that SVNCommits can be retrieved on-demand.
1968
1969   MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1970   In 'new' mode, PersistenceManager will initialize a new set of on-disk
1971   databases and be fully-featured.
1972   In 'read' mode, PersistenceManager will open existing on-disk databases
1973   and the set_* methods will be unavailable."""
1974   def __init__(self, mode):
1975     self.mode = mode
1976     if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1977       raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1978     self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1979     Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1980     self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1981     Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1982     self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1983     Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1984     self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1985     self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1986     ###PERF kff Elsewhere there are comments about sucking the tags db
1987     ### into memory.  That seems like a good idea.
1988     if not Ctx().trunk_only:
1989       self.tags_db = TagsDatabase(DB_OPEN_READ)
1990       self.motivating_revnums = SDatabase(temp(MOTIVATING_REVNUMS), mode)
1991       Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1992
1993     # "branch_name" -> svn_revnum in which branch was last filled.
1994     # This is used by CVSCommit._pre_commit, to prevent creating a fill
1995     # revision which would have nothing to do.
1996     self.last_filled = {}
1997
1998   def get_svn_revnum(self, cvs_rev_unique_key):
1999     """Return the Subversion revision number in which
2000     CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2001     is no mapping for CVS_REV_UNIQUE_KEY."""
2002     return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2003
2004   def get_svn_commit(self, svn_revnum):
2005     """Return an SVNCommit that corresponds to SVN_REVNUM.
2006
2007     If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2008
2009     This method can throw SVNCommitInternalInconsistencyError.
2010     """
2011     svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2012     c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2013     if c_rev_keys == None:
2014       return None
2015
2016     digest = None
2017     for key in c_rev_keys:
2018       c_rev = self.cvs_revisions.get_revision(key)
2019       svn_commit.add_revision(c_rev)
2020       # Set the author and log message for this commit by using
2021       # CVSRevision metadata, but only if haven't done so already.
2022       if digest is None:
2023         digest = c_rev.digest
2024         author, log_msg = self.svn_commit_metadata[digest]
2025         svn_commit.set_author(author)
2026         svn_commit.set_log_msg(log_msg)
2027
2028     # If we're doing a trunk-only conversion, we don't need to do any more
2029     # work.
2030     if Ctx().trunk_only:
2031       return svn_commit
2032
2033     name, date = self._get_name_and_date(svn_revnum)
2034     if name:
2035       svn_commit.set_symbolic_name(name)
2036       svn_commit.set_date(date)
2037       if self.tags_db.has_key(name):
2038         svn_commit.is_tag = 1
2039
2040     motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2041     if motivating_revnum:
2042       svn_commit.set_motivating_revnum(int(motivating_revnum))
2043       svn_commit.set_date(date)
2044
2045     if len(svn_commit.cvs_revs) and name:
2046       raise SVNCommit.SVNCommitInternalInconsistencyError(
2047           "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2048           "symbolic name ('%s') to fill."
2049           % (_clean_symbolic_name(name),))
2050
2051     return svn_commit
2052
2053   def set_cvs_revs(self, svn_revnum, cvs_revs):
2054     """Record the bidirectional mapping between SVN_REVNUM and
2055     CVS_REVS."""
2056     if self.mode == DB_OPEN_READ:
2057       raise RuntimeError, \
2058           'Write operation attempted on read-only PersistenceManager'
2059     for c_rev in cvs_revs:
2060       Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2061     self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2062     for c_rev in cvs_revs:
2063       self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2064
2065   def set_name_and_date(self, svn_revnum, name, date):
2066     """Associate symbolic name NAME and DATE with SVN_REVNUM.
2067
2068     NAME is allowed to be None."""
2069
2070     if self.mode == DB_OPEN_READ:
2071       raise RuntimeError, \
2072           'Write operation attempted on read-only PersistenceManager'
2073     self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2074     self.last_filled[name] = svn_revnum
2075
2076   def _get_name_and_date(self, svn_revnum):
2077     """Return a tuple containing the symbolic name and date associated
2078     with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2079     associated with it."""
2080     return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2081
2082   def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2083     """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2084     if self.mode == DB_OPEN_READ:
2085       raise RuntimeError, \
2086           'Write operation attempted on read-only PersistenceManager'
2087     self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2088
2089
2090 class CVSCommit:
2091   """Each instance of this class contains a number of CVS Revisions
2092   that correspond to one or more Subversion Commits.  After all CVS
2093   Revisions are added to the grouping, calling process_revisions will
2094   generate a Subversion Commit (or Commits) for the set of CVS
2095   Revisions in the grouping."""
2096
2097   def __init__(self, digest, author, log):
2098     self.digest = digest
2099     self.author = author
2100     self.log = log
2101
2102     # Symbolic names for which the last source revision has already
2103     # been seen and for which the CVSRevisionAggregator has already
2104     # generated a fill SVNCommit.  See self.process_revisions().
2105     self.done_symbols = [ ]
2106
2107     self.files = { }
2108     # Lists of CVSRevisions
2109     self.changes = [ ]
2110     self.deletes = [ ]
2111
2112     # Start out with a t_min higher than any incoming time T, and a
2113     # t_max lower than any incoming T.  This way the first T will
2114     # push t_min down to T, and t_max up to T, naturally (without any
2115     # special-casing), and successive times will then ratchet them
2116     # outward as appropriate.
2117     self.t_min = 1L<<32
2118     self.t_max = 0
2119
2120     # This will be set to the SVNCommit that occurs in self._commit.
2121     self.motivating_commit = None
2122
2123     # This is a list of all non-primary commits motivated by the main
2124     # commit.  We gather these so that we can set their dates to the
2125     # same date as the primary commit.
2126     self.secondary_commits = [ ]
2127
2128     # State for handling default branches.
2129     #
2130     # Here is a tempting, but ultimately nugatory, bit of logic, which
2131     # I share with you so you may appreciate the less attractive, but
2132     # refreshingly non-nugatory, logic which follows it:
2133     #
2134     # If some of the commits in this txn happened on a non-trunk
2135     # default branch, then those files will have to be copied into
2136     # trunk manually after being changed on the branch (because the
2137     # RCS "default branch" appears as head, i.e., trunk, in practice).
2138     # As long as those copies don't overwrite any trunk paths that
2139     # were also changed in this commit, then we can do the copies in
2140     # the same revision, because they won't cover changes that don't
2141     # appear anywhere/anywhen else.  However, if some of the trunk dst
2142     # paths *did* change in this commit, then immediately copying the
2143     # branch changes would lose those trunk mods forever.  So in this
2144     # case, we need to do at least that copy in its own revision.  And
2145     # for simplicity's sake, if we're creating the new revision for
2146     # even one file, then we just do all such copies together in the
2147     # new revision.
2148     #
2149     # Doesn't that sound nice?
2150     #
2151     # Unfortunately, Subversion doesn't support copies with sources
2152     # in the current txn.  All copies must be based in committed
2153     # revisions.  Therefore, we generate the above-described new
2154     # revision unconditionally.
2155     #
2156     # This is a list of c_revs, and a c_rev is appended for each
2157     # default branch commit that will need to be copied to trunk (or
2158     # deleted from trunk) in some generated revision following the
2159     # "regular" revision.
2160     self.default_branch_cvs_revisions = [ ]
2161
2162   def __cmp__(self, other):
2163     # Commits should be sorted by t_max.  If both self and other have
2164     # the same t_max, break the tie using t_min, and lastly, digest
2165     return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2166             or cmp(self.digest, other.digest))
2167
2168   def has_file(self, fname):
2169     return self.files.has_key(fname)
2170
2171   def revisions(self):
2172     return self.changes + self.deletes
2173
2174   def opens_symbolic_name(self, name):
2175     """Returns true if any CVSRevision in this commit is on a tag or a
2176     branch or is the origin of a tag or branch."""
2177     for c_rev in self.revisions():
2178       if c_rev.opens_symbolic_name(name):
2179         return 1
2180     return 0
2181
2182   def add_revision(self, c_rev):
2183     # Record the time range of this commit.
2184     #
2185     # ### ISSUE: It's possible, though unlikely, that the time range
2186     # of a commit could get gradually expanded to be arbitrarily
2187     # longer than COMMIT_THRESHOLD.  I'm not sure this is a huge
2188     # problem, and anyway deciding where to break it up would be a
2189     # judgement call.  For now, we just print a warning in commit() if
2190     # this happens.
2191     if c_rev.timestamp < self.t_min:
2192       self.t_min = c_rev.timestamp
2193     if c_rev.timestamp > self.t_max:
2194       self.t_max = c_rev.timestamp
2195
2196     if c_rev.op == OP_DELETE:
2197       self.deletes.append(c_rev)
2198     else:
2199       # OP_CHANGE or OP_ADD
2200       self.changes.append(c_rev)
2201
2202     self.files[c_rev.fname] = 1
2203
2204   def _pre_commit(self):
2205     """Generates any SVNCommits that must exist before the main
2206     commit."""
2207
2208     # There may be multiple c_revs in this commit that would cause
2209     # branch B to be filled, but we only want to fill B once.  On the
2210     # other hand, there might be multiple branches committed on in
2211     # this commit.  Whatever the case, we should count exactly one
2212     # commit per branch, because we only fill a branch once per
2213     # CVSCommit.  This list tracks which branches we've already
2214     # counted.
2215     accounted_for_sym_names = [ ]
2216
2217     def fill_needed(c_rev, pm):
2218       """Return 1 if this is the first commit on a new branch (for
2219       this file) and we need to fill the branch; else return 0
2220       (meaning that some other file's first commit on the branch has
2221       already done the fill for us).
2222
2223       If C_REV.op is OP_ADD, only return 1 if the branch that this
2224       commit is on has no last filled revision.
2225
2226       PM is a PersistenceManager to query.
2227       """
2228
2229       # Different '.' counts indicate that c_rev is now on a different
2230       # line of development (and may need a fill)
2231       if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2232         svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2233         # It should be the case that when we have a file F that
2234         # is added on branch B (thus, F on trunk is in state
2235         # 'dead'), we generate an SVNCommit to fill B iff the branch
2236         # has never been filled before.
2237         #
2238         # If this c_rev.op == OP_ADD, *and* the branch has never
2239         # been filled before, then fill it now.  Otherwise, no need to
2240         # fill it.
2241         if c_rev.op == OP_ADD:
2242           if pm.last_filled.get(c_rev.branch_name, None) is None:
2243             return 1
2244         elif c_rev.op == OP_CHANGE:
2245           if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2246             return 1
2247         elif c_rev.op == OP_DELETE:
2248           if pm.last_filled.get(c_rev.branch_name, None) is None:
2249             return 1
2250       return 0
2251
2252     for c_rev in self.changes + self.deletes:
2253       # If a commit is on a branch, we must ensure that the branch
2254       # path being committed exists (in HEAD of the Subversion
2255       # repository).  If it doesn't exist, we will need to fill the
2256       # branch.  After the fill, the path on which we're committing
2257       # will exist.
2258       if c_rev.branch_name \
2259           and c_rev.branch_name not in accounted_for_sym_names \
2260           and c_rev.branch_name not in self.done_symbols \
2261           and fill_needed(c_rev, Ctx()._persistence_manager):
2262         svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2263                                % c_rev.branch_name)
2264         svn_commit.set_symbolic_name(c_rev.branch_name)
2265         self.secondary_commits.append(svn_commit)
2266         accounted_for_sym_names.append(c_rev.branch_name)
2267
2268   def _commit(self):
2269     """Generates the primary SVNCommit that corresponds to this
2270     CVSCommit."""
2271     # Generate an SVNCommit unconditionally.  Even if the only change
2272     # in this CVSCommit is a deletion of an already-deleted file (that
2273     # is, a CVS revision in state 'dead' whose predecessor was also in
2274     # state 'dead'), the conversion will still generate a Subversion
2275     # revision containing the log message for the second dead
2276     # revision, because we don't want to lose that information.
2277     svn_commit = SVNCommit("commit")
2278     self.motivating_commit = svn_commit
2279
2280     for c_rev in self.changes:
2281       svn_commit.add_revision(c_rev)
2282       # Only make a change if we need to.  When 1.1.1.1 has an empty
2283       # deltatext, the explanation is almost always that we're looking
2284       # at an imported file whose 1.1 and 1.1.1.1 are identical.  On
2285       # such imports, CVS creates an RCS file where 1.1 has the
2286       # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2287       # content as 1.1.  There's no reason to reflect this non-change
2288       # in the repository, so we want to do nothing in this case.  (If
2289       # we were really paranoid, we could make sure 1.1's log message
2290       # is the CVS-generated "Initial revision\n", but I think the
2291       # conditions below are strict enough.)
2292       if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2293               and (c_rev.rev == "1.1.1.1")):
2294         if c_rev.is_default_branch_revision():
2295           self.default_branch_cvs_revisions.append(c_rev)
2296
2297     for c_rev in self.deletes:
2298       # When a file is added on a branch, CVS not only adds the file
2299       # on the branch, but generates a trunk revision (typically
2300       # 1.1) for that file in state 'dead'.  We only want to add
2301       # this revision if the log message is not the standard cvs
2302       # fabricated log message.
2303       if c_rev.prev_rev is None:
2304         # c_rev.branches may be empty if the originating branch
2305         # has been excluded.
2306         if not c_rev.branches:
2307           continue
2308         cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2309                              % (c_rev.filename(),
2310                                 c_rev.branches[0]))
2311         author, log_msg = \
2312             Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2313         if log_msg == cvs_generated_msg:
2314           continue
2315
2316       svn_commit.add_revision(c_rev)
2317       if c_rev.is_default_branch_revision():
2318         self.default_branch_cvs_revisions.append(c_rev)
2319
2320     # There is a slight chance that we didn't actually register any
2321     # CVSRevisions with our SVNCommit (see loop over self.deletes
2322     # above), so if we have no CVSRevisions, we don't flush the
2323     # svn_commit to disk and roll back our revnum.
2324     if len(svn_commit.cvs_revs) > 0:
2325       svn_commit.flush()
2326     else:
2327       # We will not be flushing this SVNCommit, so rollback the
2328       # SVNCommit revision counter.
2329       SVNCommit.revnum = SVNCommit.revnum - 1
2330
2331     if not Ctx().trunk_only:
2332       for c_rev in self.revisions():
2333         Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2334
2335   def _post_commit(self):
2336     """Generates any SVNCommits that we can perform now that _commit
2337     has happened.  That is, handle non-trunk default branches.
2338     Sometimes an RCS file has a non-trunk default branch, so a commit
2339     on that default branch would be visible in a default CVS checkout
2340     of HEAD.  If we don't copy that commit over to Subversion's trunk,
2341     then there will be no Subversion tree which corresponds to that
2342     CVS checkout.  Of course, in order to copy the path over, we may
2343     first need to delete the existing trunk there.  """
2344
2345     # Only generate a commit if we have default branch revs
2346     if len(self.default_branch_cvs_revisions):
2347       # Generate an SVNCommit for all of our default branch c_revs.
2348       svn_commit = SVNCommit("post-commit default branch(es)")
2349       svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2350       for c_rev in self.default_branch_cvs_revisions:
2351         svn_commit.add_revision(c_rev)
2352         Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2353                                                             svn_commit.revnum)
2354       self.secondary_commits.append(svn_commit)
2355
2356   def process_revisions(self, done_symbols):
2357     """Process all the CVSRevisions that this instance has, creating
2358     one or more SVNCommits in the process.  Generate fill SVNCommits
2359     only for symbols not in DONE_SYMBOLS (avoids unnecessary
2360     fills).
2361
2362     Return the primary SVNCommit that corresponds to this CVSCommit.
2363     The returned SVNCommit is the commit that motivated any other
2364     SVNCommits generated in this CVSCommit."""
2365     self.done_symbols = done_symbols
2366     seconds = self.t_max - self.t_min + 1
2367
2368     Log().write(LOG_VERBOSE, '-' * 60)
2369     Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2370     if seconds == 1:
2371       Log().write(LOG_VERBOSE, '  Start time: %s (duration: 1 second)'
2372                   % time.ctime(self.t_max))
2373     else:
2374       Log().write(LOG_VERBOSE, '  Start time: %s' % time.ctime(self.t_min))
2375       Log().write(LOG_VERBOSE, '  End time:   %s (duration: %d seconds)'
2376                   % (time.ctime(self.t_max), seconds))
2377
2378     if seconds > COMMIT_THRESHOLD + 1:
2379       Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2380                   % (warning_prefix, COMMIT_THRESHOLD))
2381
2382     if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2383       self._commit()
2384       return self.motivating_commit
2385
2386     self._pre_commit()
2387     self._commit()
2388     self._post_commit()
2389
2390     for svn_commit in self.secondary_commits:
2391       svn_commit.set_date(self.motivating_commit.get_date())
2392       svn_commit.flush()
2393
2394     return self.motivating_commit
2395
2396
2397 class SVNCommit:
2398   """This represents one commit to the Subversion Repository.  There
2399   are three types of SVNCommits:
2400
2401   1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2402
2403   2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2404
2405   3. Updates trunk to reflect the contents of a particular branch
2406      (this is to handle RCS default branches)."""
2407
2408   # The revision number to assign to the next new SVNCommit.
2409   # We start at 2 because SVNRepositoryMirror uses the first commit
2410   # to create trunk, tags, and branches.
2411   revnum = 2
2412
2413   class SVNCommitInternalInconsistencyError(Exception):
2414     """Exception raised if we encounter an impossible state in the
2415     SVNCommit Databases."""
2416     pass
2417
2418   def __init__(self, description="", revnum=None, cvs_revs=None):
2419     """Instantiate an SVNCommit.  DESCRIPTION is for debugging only.
2420     If REVNUM, the SVNCommit will correspond to that revision number;
2421     and if CVS_REVS, then they must be the exact set of CVSRevisions for
2422     REVNUM.
2423
2424     It is an error to pass CVS_REVS without REVNUM, but you may pass
2425     REVNUM without CVS_REVS, and then add a revision at a time by
2426     invoking add_revision()."""
2427     self._description = description
2428
2429     # Revprop metadata for this commit.
2430     #
2431     # These initial values are placeholders.  At least the log and the
2432     # date should be different by the time these are used.
2433     #
2434     # They are private because their values should be returned encoded
2435     # in UTF8, but callers aren't required to set them in UTF8.
2436     # Therefore, accessor methods are used to set them, and
2437     # self.get_revprops() is used to to get them, in dictionary form.
2438     self._author = Ctx().username
2439     self._log_msg = "This log message means an SVNCommit was used too soon."
2440     self._max_date = 0  # Latest date seen so far.
2441
2442     self.cvs_revs = cvs_revs or []
2443     if revnum:
2444       self.revnum = revnum
2445     else:
2446       self.revnum = SVNCommit.revnum
2447       SVNCommit.revnum = SVNCommit.revnum + 1
2448
2449     # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2450     self.symbolic_name = None
2451
2452     # If this commit is a default branch synchronization, this
2453     # variable represents the subversion revision number of the
2454     # *primary* commit where the default branch changes actually
2455     # happened.  It is None otherwise.
2456     #
2457     # It is possible for multiple synchronization commits to refer to
2458     # the same motivating commit revision number, and it is possible
2459     # for a single synchronization commit to contain CVSRevisions on
2460     # multiple different default branches.
2461     self.motivating_revnum = None
2462
2463     # is_tag is true only if this commit is a fill of a symbolic name
2464     # that is a tag, None in all other cases.
2465     self.is_tag = None
2466
2467   def set_symbolic_name(self, symbolic_name):
2468     "Set self.symbolic_name to SYMBOLIC_NAME."
2469     self.symbolic_name = symbolic_name
2470
2471   def set_motivating_revnum(self, revnum):
2472     "Set self.motivating_revnum to REVNUM."
2473     self.motivating_revnum = revnum
2474
2475   def set_author(self, author):
2476     """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2477     This is the only way to set an SVNCommit's author."""
2478     self._author = author
2479
2480   def set_log_msg(self, msg):
2481     """Set this SVNCommit's log message to MSG (a locally-encoded string).
2482     This is the only way to set an SVNCommit's log message."""
2483     self._log_msg = msg
2484
2485   def set_date(self, date):
2486     """Set this SVNCommit's date to DATE (an integer).
2487     Note that self.add_revision() updates this automatically based on
2488     a CVSRevision; so you may not need to call this at all, and even
2489     if you do, the value may be overwritten by a later call to
2490     self.add_revision()."""
2491     self._max_date = date
2492
2493   def get_date(self):
2494     """Returns this SVNCommit's date as an integer."""
2495     return self._max_date
2496
2497   def get_revprops(self):
2498     """Return the Subversion revprops for this SVNCommit."""
2499     date = format_date(self._max_date)
2500     try:
2501       utf8_author = None
2502       if self._author is not None:
2503         utf8_author = to_utf8(self._author)
2504       utf8_log = to_utf8(self.get_log_msg())
2505       return { 'svn:author' : utf8_author,
2506                'svn:log'    : utf8_log,
2507                'svn:date'   : date }
2508     except UnicodeError:
2509       Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2510                   % warning_prefix)
2511       Log().write(LOG_WARN, "  author: '%s'" % self._author)
2512       Log().write(LOG_WARN, "  log:    '%s'" % self.get_log_msg().rstrip())
2513       Log().write(LOG_WARN, "  date:   '%s'" % date)
2514       Log().write(LOG_WARN,
2515                   "(subversion rev %s)  Related files:" % self.revnum)
2516       for c_rev in self.cvs_revs:
2517         Log().write(LOG_WARN, " ", c_rev.fname)
2518
2519       Log().write(LOG_WARN, "Consider rerunning with (for example)",
2520                   "'--encoding=latin1'.\n")
2521       # It's better to fall back to the original (unknown encoding) data
2522       # than to either 1) quit or 2) record nothing at all.
2523       return { 'svn:author' : self._author,
2524                'svn:log'    : self.get_log_msg(),
2525                'svn:date'   : date }
2526
2527   def add_revision(self, cvs_rev):
2528     self.cvs_revs.append(cvs_rev)
2529     if cvs_rev.timestamp > self._max_date:
2530       self._max_date = cvs_rev.timestamp
2531
2532   def _is_primary_commit(self):
2533     """Return true if this is a primary SVNCommit, false otherwise."""
2534     return not (self.symbolic_name or self.motivating_revnum)
2535
2536   def flush(self):
2537     Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2538                 % (self.revnum, self._description))
2539     Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2540
2541     if self.motivating_revnum is not None:
2542       Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2543                                                        self.motivating_revnum)
2544
2545     # If we're not a primary commit, then store our date and/or our
2546     # symbolic_name
2547     if not self._is_primary_commit():
2548       Ctx()._persistence_manager.set_name_and_date(
2549           self.revnum, self.symbolic_name, self._max_date)
2550
2551   def __str__(self):
2552     """ Print a human-readable description of this SVNCommit.  This
2553     description is not intended to be machine-parseable (although
2554     we're not going to stop you if you try!)"""
2555
2556     ret = "SVNCommit #: " + str(self.revnum) + "\n"
2557     if self.symbolic_name:
2558       ret += ("   symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2559               + "\n")
2560     else:
2561       ret += "   NO symbolic name\n"
2562     ret += "   debug description: " + self._description + "\n"
2563     ret += "   cvs_revs:\n"
2564     for c_rev in self.cvs_revs:
2565       ret += "     " + c_rev.unique_key() + "\n"
2566     return ret
2567
2568   def get_log_msg(self):
2569     """Returns the actual log message for a primary commit, and the
2570     appropriate manufactured log message for a secondary commit."""
2571     if self.symbolic_name is not None:
2572       return self._log_msg_for_symbolic_name_commit()
2573     elif self.motivating_revnum is not None:
2574       return self._log_msg_for_default_branch_commit()
2575     else:
2576       return self._log_msg
2577
2578   def _log_msg_for_symbolic_name_commit(self):
2579     """Creates a log message for a manufactured commit that fills
2580     self.symbolic_name.  If self.is_tag is true, write the log message
2581     as though for a tag, else write it as though for a branch."""
2582     type = 'branch'
2583     if self.is_tag:
2584       type = 'tag'
2585
2586     # In Python 2.2.3, we could use textwrap.fill().  Oh well :-).
2587     space_or_newline = ' '
2588     cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2589     if len(cleaned_symbolic_name) >= 13:
2590       space_or_newline = '\n'
2591
2592     return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2593            % (type, space_or_newline, cleaned_symbolic_name)
2594
2595   def _log_msg_for_default_branch_commit(self):
2596     """Creates a log message for a manufactured commit that
2597     synchronizes a non-trunk default branch with trunk."""
2598     msg = 'This commit was generated by cvs2svn to compensate for '     \
2599           'changes in r%d,\n'                                           \
2600           'which included commits to RCS files with non-trunk default ' \
2601           'branches.\n' % self.motivating_revnum
2602     return msg
2603
2604 class CVSRevisionAggregator:
2605   """This class groups CVSRevisions into CVSCommits that represent
2606   at least one SVNCommit."""
2607   def __init__(self):
2608     self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2609     if not Ctx().trunk_only:
2610       self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2611                                    DB_OPEN_READ)
2612     self.cvs_commits = {}
2613     self.pending_symbols = {}
2614     # A list of symbols for which we've already encountered the last
2615     # CVSRevision that is a source for that symbol.  That is, the
2616     # final fill for this symbol has been done, and we never need to
2617     # fill it again.
2618     self.done_symbols = [ ]
2619
2620     # This variable holds the most recently created primary svn_commit
2621     # object.  CVSRevisionAggregator maintains this variable merely
2622     # for its date, so that it can set dates for the SVNCommits
2623     # created in self.attempt_to_commit_symbols().
2624     self.latest_primary_svn_commit = None
2625
2626     Ctx()._symbolings_logger = SymbolingsLogger()
2627     Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2628     Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2629                                            DB_OPEN_READ)
2630
2631
2632   def process_revision(self, c_rev):
2633     # Each time we read a new line, we scan the commits we've
2634     # accumulated so far to see if any are ready for processing now.
2635     ready_queue = [ ]
2636     for digest_key, cvs_commit in self.cvs_commits.items():
2637       if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2638         ready_queue.append(cvs_commit)
2639         del self.cvs_commits[digest_key]
2640         continue
2641       # If the inbound commit is on the same file as a pending commit,
2642       # close the pending commit to further changes.  Don't flush it though,
2643       # as there may be other pending commits dated before this one.
2644       # ### ISSUE: the has_file() check below is not optimal.
2645       # It does fix the dataloss bug where revisions would get lost
2646       # if checked in too quickly, but it can also break apart the
2647       # commits.  The correct fix would require tracking the dependencies
2648       # between change sets and committing them in proper order.
2649       if cvs_commit.has_file(c_rev.fname):
2650         unused_id = digest_key + '-'
2651         # Find a string that does is not already a key in
2652         # the self.cvs_commits dict
2653         while self.cvs_commits.has_key(unused_id):
2654           unused_id = unused_id + '-'
2655         self.cvs_commits[unused_id] = cvs_commit
2656         del self.cvs_commits[digest_key]
2657
2658     # Add this item into the set of still-available commits.
2659     if self.cvs_commits.has_key(c_rev.digest):
2660       cvs_commit = self.cvs_commits[c_rev.digest]
2661     else:
2662       author, log = self.metadata_db[c_rev.digest]
2663       self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2664                                                  author, log)
2665       cvs_commit = self.cvs_commits[c_rev.digest]
2666     cvs_commit.add_revision(c_rev)
2667
2668     # If there are any elements in the ready_queue at this point, they
2669     # need to be processed, because this latest rev couldn't possibly
2670     # be part of any of them.  Sort them into time-order, then process
2671     # 'em.
2672     ready_queue.sort()
2673
2674     # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2675     # commits are ready.
2676     if len(ready_queue) == 0:
2677       self.attempt_to_commit_symbols(ready_queue, c_rev)
2678
2679     for cvs_commit in ready_queue[:]:
2680       self.latest_primary_svn_commit \
2681           = cvs_commit.process_revisions(self.done_symbols)
2682       ready_queue.remove(cvs_commit)
2683       self.attempt_to_commit_symbols(ready_queue, c_rev)
2684
2685   def flush(self):
2686     """Commit anything left in self.cvs_commits.  Then inform the
2687     SymbolingsLogger that all commits are done."""
2688
2689     ready_queue = [ ]
2690     for k, v in self.cvs_commits.items():
2691       ready_queue.append((v, k))
2692
2693     ready_queue.sort()
2694     for cvs_commit_tuple in ready_queue[:]:
2695       self.latest_primary_svn_commit = \
2696         cvs_commit_tuple[0].process_revisions(self.done_symbols)
2697       ready_queue.remove(cvs_commit_tuple)
2698       del self.cvs_commits[cvs_commit_tuple[1]]
2699       self.attempt_to_commit_symbols([])
2700
2701     if not Ctx().trunk_only:
2702       Ctx()._symbolings_logger.close()
2703
2704   def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2705     """
2706     This function generates 1 SVNCommit for each symbol in
2707     self.pending_symbols that doesn't have an opening CVSRevision in
2708     either QUEUED_COMMITS or self.cvs_commits.values().
2709
2710     If C_REV is not None, then we first add to self.pending_symbols
2711     any symbols from C_REV that C_REV is the last CVSRevision for.
2712     """
2713     # If we're not doing a trunk-only conversion, get the symbolic
2714     # names that this c_rev is the last *source* CVSRevision for and
2715     # add them to those left over from previous passes through the
2716     # aggregator.
2717     if c_rev and not Ctx().trunk_only:
2718       for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2719         self.pending_symbols[sym] = None
2720
2721     # Make a list of all symbols that still have *source* CVSRevisions
2722     # in the pending commit queue (self.cvs_commits).
2723     open_symbols = {}
2724     for sym in self.pending_symbols.keys():
2725       for cvs_commit in self.cvs_commits.values() + queued_commits:
2726         if cvs_commit.opens_symbolic_name(sym):
2727           open_symbols[sym] = None
2728           break
2729
2730     # Sort the pending symbols so that we will always process the
2731     # symbols in the same order, regardless of the order in which the
2732     # dict hashing algorithm hands them back to us.  We do this so
2733     # that our tests will get the same results on all platforms.
2734     sorted_pending_symbols_keys = self.pending_symbols.keys()
2735     sorted_pending_symbols_keys.sort()
2736     for sym in sorted_pending_symbols_keys:
2737       if open_symbols.has_key(sym): # sym is still open--don't close it.
2738         continue
2739       svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2740       svn_commit.set_symbolic_name(sym)
2741       svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2742       svn_commit.flush()
2743       self.done_symbols.append(sym)
2744       del self.pending_symbols[sym]
2745
2746
2747 class SymbolingsReader:
2748   """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2749   and the SYMBOL_OFFSETS_DB.  Does the heavy lifting of finding and
2750   returning the correct opening and closing Subversion revision
2751   numbers for a given symbolic name."""
2752   def __init__(self):
2753     """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2754     reads the offsets database into memory."""
2755     self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2756     # The offsets_db is really small, and we need to read and write
2757     # from it a fair bit, so suck it into memory
2758     offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2759     self.offsets = { }
2760     for key in offsets_db.db.keys():
2761       #print " ZOO:", key, offsets_db[key]
2762       self.offsets[key] = offsets_db[key]
2763
2764   def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2765     """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2766     SymbolicNameFillingGuide object.
2767
2768     Note that if we encounter an opening rev in this fill, but the
2769     corresponding closing rev takes place later than SVN_REVNUM, the
2770     closing will not be passed to SymbolicNameFillingGuide in this
2771     fill (and will be discarded when encountered in a later fill).
2772     This is perfectly fine, because we can still do a valid fill
2773     without the closing--we always try to fill what we can as soon as
2774     we can."""
2775
2776     openings_closings_map = OpeningsClosingsMap(symbolic_name)
2777
2778     # It's possible to have a branch start with a file that was added
2779     # on a branch
2780     if self.offsets.has_key(symbolic_name):
2781       # set our read offset for self.symbolings to the offset for
2782       # symbolic_name
2783       self.symbolings.seek(self.offsets[symbolic_name])
2784
2785       while 1:
2786         fpos = self.symbolings.tell()
2787         line = self.symbolings.readline().rstrip()
2788         if not line:
2789           break
2790         name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2791         if branch_name == '*':
2792           svn_path = Ctx().project.make_trunk_path(cvs_path)
2793         else:
2794           svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2795         revnum = int(revnum)
2796         if revnum > svn_revnum or name != symbolic_name:
2797           break
2798         openings_closings_map.register(svn_path, revnum, type)
2799
2800       # get current offset of the read marker and set it to the offset
2801       # for the beginning of the line we just read if we used anything
2802       # we read.
2803       if not openings_closings_map.is_empty():
2804         self.offsets[symbolic_name] = fpos
2805
2806     return SymbolicNameFillingGuide(openings_closings_map)
2807
2808
2809 class SvnRevisionRange:
2810   """The range of subversion revision numbers from which a path can be
2811   copied.  self.opening_revnum is the number of the earliest such
2812   revision, and self.closing_revnum is one higher than the number of
2813   the last such revision.  If self.closing_revnum is None, then no
2814   closings were registered."""
2815
2816   def __init__(self, opening_revnum):
2817     self.opening_revnum = opening_revnum
2818     self.closing_revnum = None
2819
2820   def add_closing(self, closing_revnum):
2821     # When we have a non-trunk default branch, we may have multiple
2822     # closings--only register the first closing we encounter.
2823     if self.closing_revnum is None:
2824       self.closing_revnum = closing_revnum
2825
2826   def __str__(self):
2827     if self.closing_revnum is None:
2828       return '[%d:]' % (self.opening_revnum,)
2829     else:
2830       return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2831
2832
2833 class OpeningsClosingsMap:
2834   """A dictionary of openings and closings for a symbolic name in the
2835   current SVNCommit.
2836
2837   The user should call self.register() for the openings and closings,
2838   then self.get_node_tree() to retrieve the information as a
2839   SymbolicNameFillingGuide."""
2840
2841   def __init__(self, symbolic_name):
2842     """Initialize OpeningsClosingsMap and prepare it for receiving
2843     openings and closings."""
2844
2845     self.name = symbolic_name
2846
2847     # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2848     self.things = { }
2849
2850   def register(self, svn_path, svn_revnum, type):
2851     """Register an opening or closing revision for this symbolic name.
2852     SVN_PATH is the source path that needs to be copied into
2853     self.symbolic_name, and SVN_REVNUM is either the first svn
2854     revision number that we can copy from (our opening), or the last
2855     (not inclusive) svn revision number that we can copy from (our
2856     closing).  TYPE indicates whether this path is an opening or a a
2857     closing.
2858
2859     The opening for a given SVN_PATH must be passed before the closing
2860     for it to have any effect... any closing encountered before a
2861     corresponding opening will be discarded.
2862
2863     It is not necessary to pass a corresponding closing for every
2864     opening.
2865     """
2866     # Always log an OPENING
2867     if type == OPENING:
2868       self.things[svn_path] = SvnRevisionRange(svn_revnum)
2869     # Only log a closing if we've already registered the opening for that
2870     # path.
2871     elif type == CLOSING and self.things.has_key(svn_path):
2872       self.things[svn_path].add_closing(svn_revnum)
2873
2874   def is_empty(self):
2875     """Return true if we haven't accumulated any openings or closings,
2876     false otherwise."""
2877     return not len(self.things)
2878
2879   def get_things(self):
2880     """Return a list of (svn_path, SvnRevisionRange) tuples for all
2881     svn_paths with registered openings or closings."""
2882
2883     return self.things.items()
2884
2885
2886 class SymbolicNameFillingGuide:
2887   """A node tree representing the source paths to be copied to fill
2888   self.symbolic_name in the current SVNCommit.
2889
2890   self._node_tree is the root of the directory tree, in the form {
2891   path_component : subnode }.  Leaf nodes are instances of
2892   SvnRevisionRange.  Intermediate (directory) nodes are dictionaries
2893   mapping relative names to subnodes.
2894
2895   By walking self._node_tree and calling self.get_best_revnum() on
2896   each node, the caller can determine what subversion revision number
2897   to copy the path corresponding to that node from.  self._node_tree
2898   should be treated as read-only.
2899
2900   The caller can then descend to sub-nodes to see if their "best
2901   revnum" differs from their parents' and if it does, take appropriate
2902   actions to "patch up" the subtrees."""
2903
2904   def __init__(self, openings_closings_map):
2905     """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2906     store into it the openings and closings from
2907     OPENINGS_CLOSINGS_MAP."""
2908
2909     self.name = openings_closings_map.name
2910
2911     # The dictionary that holds our node tree as a map { node_key :
2912     # node }.
2913     self._node_tree = { }
2914
2915     for svn_path, svn_revision_range in openings_closings_map.get_things():
2916       (head, tail) = _path_split(svn_path)
2917       self._get_node_for_path(head)[tail] = svn_revision_range
2918
2919     #self.print_node_tree(self._node_tree)
2920
2921   def _get_node_for_path(self, svn_path):
2922     """Return the node key for svn_path, creating new nodes as needed."""
2923     # Walk down the path, one node at a time.
2924     node = self._node_tree
2925     for component in svn_path.split('/'):
2926       if node.has_key(component):
2927         node = node[component]
2928       else:
2929         old_node = node
2930         node = {}
2931         old_node[component] = node
2932
2933     return node
2934
2935   def get_best_revnum(self, node, preferred_revnum):
2936     """Determine the best subversion revision number to use when
2937     copying the source tree beginning at NODE.  Returns a
2938     subversion revision number.
2939
2940     PREFERRED_REVNUM is passed to best_rev and used to calculate the
2941     best_revnum."""
2942
2943     def score_revisions(svn_revision_ranges):
2944       """Return a list of revisions and scores based on
2945       SVN_REVISION_RANGES.  The returned list looks like:
2946
2947          [(REV1 SCORE1), (REV2 SCORE2), ...]
2948
2949       where the tuples are sorted by revision number.
2950       SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
2951
2952       For each svn revision that appears as either an opening_revnum
2953       or closing_revnum for one of the svn_revision_ranges, output a
2954       tuple indicating how many of the SvnRevisionRanges include that
2955       svn_revision in its range.  A score thus indicates that copying
2956       the corresponding revision (or any following revision up to the
2957       next revision in the list) of the object in question would yield
2958       that many correct paths at or underneath the object.  There may
2959       be other paths underneath it which are not correct and would
2960       need to be deleted or recopied; those can only be detected by
2961       descending and examining their scores.
2962
2963       If OPENINGS is empty, return the empty list."""
2964       openings = [ x.opening_revnum
2965                    for x in svn_revision_ranges ]
2966       closings = [ x.closing_revnum
2967                    for x in svn_revision_ranges
2968                    if x.closing_revnum is not None ]
2969
2970       # First look for easy out.
2971       if not openings:
2972         return []
2973
2974       # Create a list with both openings (which increment the total)
2975       # and closings (which decrement the total):
2976       things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
2977       # Sort by revision number:
2978       things.sort()
2979       # Initialize output list with zeroth element of things.  This
2980       # element must exist, because it was already verified that
2981       # openings is not empty.
2982       scores = [ things[0] ]
2983       total = scores[-1][1]
2984       for (rev, change) in things[1:]:
2985         total += change
2986         if rev == scores[-1][0]:
2987           # Same revision as last entry; modify last entry:
2988           scores[-1] = (rev, total)
2989         else:
2990           # Previously-unseen revision; create new entry:
2991           scores.append((rev, total))
2992       return scores
2993
2994     def best_rev(scores, preferred_rev):
2995       """Return the revision with the highest score from SCORES, a list
2996       returned by score_revisions().  When the maximum score is shared
2997       by multiple revisions, the oldest revision is selected, unless
2998       PREFERRED_REV is one of the possibilities, in which case, it is
2999       selected."""
3000       max_score = 0
3001       preferred_rev_score = -1
3002       rev = SVN_INVALID_REVNUM
3003       if preferred_rev is None:
3004         # Comparison order of different types is arbitrary. Do not
3005         # expect None to compare less than int values below.
3006         # In Python 2.3 None compares with ints like negative infinity.
3007         # In Python 2.0 None compares with ints like positive infinity.
3008         preferred_rev = SVN_INVALID_REVNUM
3009       for revnum, count in scores:
3010         if count > max_score:
3011           max_score = count
3012           rev = revnum
3013         if revnum <= preferred_rev:
3014           preferred_rev_score = count
3015       if preferred_rev_score == max_score:
3016         rev = preferred_rev
3017       return rev, max_score
3018
3019     # Aggregate openings and closings from the rev tree
3020     svn_revision_ranges = self._list_revnums(node)
3021
3022     # Score the lists
3023     scores = score_revisions(svn_revision_ranges)
3024
3025     revnum, max_score = best_rev(scores, preferred_revnum)
3026
3027     if revnum == SVN_INVALID_REVNUM:
3028       raise FatalError("failed to find a revision "
3029                        + "to copy from when copying %s" % name)
3030     return revnum, max_score
3031
3032   def _list_revnums(self, node):
3033     """Return a list of all the SvnRevisionRanges (including
3034     duplicates) for all leaf nodes at and under NODE."""
3035
3036     if isinstance(node, SvnRevisionRange):
3037       # It is a leaf node.
3038       return [ node ]
3039     else:
3040       # It is an intermediate node.
3041       revnums = []
3042       for key, subnode in node.items():
3043         revnums.extend(self._list_revnums(subnode))
3044       return revnums
3045
3046   def get_sources(self):
3047     """Return the list of sources for this symbolic name.
3048
3049     The Project instance defines what are legitimate sources.  Raise
3050     an exception if a change occurred outside of the source
3051     directories."""
3052
3053     return self._get_sub_sources('', self._node_tree)
3054
3055   def _get_sub_sources(self, start_svn_path, start_node):
3056     """Return the list of sources for this symbolic name, starting the
3057     search at path START_SVN_PATH, which is node START_NODE.  This is
3058     a helper method, called by get_sources() (see)."""
3059
3060     project = Ctx().project
3061     if isinstance(start_node, SvnRevisionRange):
3062       # This implies that a change was found outside of the
3063       # legitimate sources.  This should never happen.
3064       raise
3065     elif project.is_source(start_svn_path):
3066       # This is a legitimate source.  Add it to list.
3067       return [ FillSource(start_svn_path, start_node) ]
3068     else:
3069       # This is a directory that is not a legitimate source.  (That's
3070       # OK because it hasn't changed directly.)  But directories
3071       # within it have been changed, so we need to search recursively
3072       # to find their enclosing sources.
3073       sources = []
3074       for entry, node in start_node.items():
3075         svn_path = _path_join(start_svn_path, entry)
3076         sources.extend(self._get_sub_sources(svn_path, node))
3077
3078     return sources
3079
3080   def print_node_tree(self, node, name='/', indent_depth=0):
3081     """For debugging purposes.  Prints all nodes in TREE that are
3082     rooted at NODE.  INDENT_DEPTH is used to indent the output of
3083     recursive calls."""
3084     if not indent_depth:
3085       print "TREE", "=" * 75
3086     if isinstance(node, SvnRevisionRange):
3087       print "TREE:", " " * (indent_depth * 2), name, node
3088     else:
3089       print "TREE:", " " * (indent_depth * 2), name
3090       for key, value in node.items():
3091         self.print_node_tree(value, key, (indent_depth + 1))
3092
3093
3094 class FillSource:
3095   """Representation of a fill source used by the symbol filler in
3096   SVNRepositoryMirror."""
3097   def __init__(self, prefix, node):
3098     """Create an unscored fill source with a prefix and a key."""
3099     self.prefix = prefix
3100     self.node = node
3101     self.score = None
3102     self.revnum = None
3103
3104   def set_score(self, score, revnum):
3105     """Set the SCORE and REVNUM."""
3106     self.score = score
3107     self.revnum = revnum
3108
3109   def __cmp__(self, other):
3110     """Comparison operator used to sort FillSources in descending
3111     score order."""
3112     if self.score is None or other.score is None:
3113       raise TypeError, 'Tried to compare unscored FillSource'
3114     return cmp(other.score, self.score)
3115
3116
3117 class SVNRepositoryMirror:
3118   """Mirror a Subversion Repository as it is constructed, one
3119   SVNCommit at a time.  The mirror is skeletal; it does not contain
3120   file contents.  The creation of a dumpfile or Subversion repository
3121   is handled by delegates.  See self.add_delegate method for how to
3122   set delegates.
3123
3124   The structure of the repository is kept in two databases and one
3125   hash.  The revs_db database maps revisions to root node keys, and
3126   the nodes_db database maps node keys to nodes.  A node is a hash
3127   from directory names to keys.  Both the revs_db and the nodes_db are
3128   stored on disk and each access is expensive.
3129
3130   The nodes_db database only has the keys for old revisions.  The
3131   revision that is being contructed is kept in memory in the new_nodes
3132   hash which is cheap to access.
3133
3134   You must invoke _start_commit between SVNCommits.
3135
3136   *** WARNING *** All path arguments to methods in this class CANNOT
3137       have leading or trailing slashes.
3138   """
3139
3140   class SVNRepositoryMirrorPathExistsError(Exception):
3141     """Exception raised if an attempt is made to add a path to the
3142     repository mirror and that path already exists in the youngest
3143     revision of the repository."""
3144     pass
3145
3146   class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3147     """Exception raised if a CVSRevision is found to have an unexpected
3148     operation (OP) value."""
3149     pass
3150
3151   class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3152     """Exception raised if an empty SymbolicNameFillingGuide is returned
3153     during a fill where the branch in question already exists."""
3154     pass
3155
3156   def __init__(self):
3157     """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3158     self.delegates = [ ]
3159
3160     # This corresponds to the 'revisions' table in a Subversion fs.
3161     self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3162     Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3163
3164     # This corresponds to the 'nodes' table in a Subversion fs.  (We
3165     # don't need a 'representations' or 'strings' table because we
3166     # only track metadata, not file contents.)
3167     self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3168     Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3169
3170     # Start at revision 0 without a root node.  It will be created
3171     # by _open_writable_root_node.
3172     self.youngest = 0
3173     self.new_root_key = None
3174     self.new_nodes = { }
3175
3176     if not Ctx().trunk_only:
3177       ###PERF IMPT: Suck this into memory.
3178       self.tags_db = TagsDatabase(DB_OPEN_READ)
3179       self.symbolings_reader = SymbolingsReader()
3180
3181   def _initialize_repository(self, date):
3182     """Initialize the repository by creating the directories for
3183     trunk, tags, and branches.  This method should only be called
3184     after all delegates are added to the repository mirror."""
3185     # Make a 'fake' SVNCommit so we can take advantage of the revprops
3186     # magic therein
3187     svn_commit = SVNCommit("Initialization", 1)
3188     svn_commit.set_date(date)
3189     svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3190
3191     self._start_commit(svn_commit)
3192     self._mkdir(Ctx().project.trunk_path)
3193     if not Ctx().trunk_only:
3194       self._mkdir(Ctx().project.branches_path)
3195       self._mkdir(Ctx().project.tags_path)
3196
3197   def _start_commit(self, svn_commit):
3198     """Start a new commit."""
3199     if self.youngest > 0:
3200       self._end_commit()
3201
3202     self.youngest = svn_commit.revnum
3203     self.new_root_key = None
3204     self.new_nodes = { }
3205
3206     self._invoke_delegates('start_commit', svn_commit)
3207
3208   def _end_commit(self):
3209     """Called at the end of each commit.  This method copies the newly
3210     created nodes to the on-disk nodes db."""
3211     if self.new_root_key is None:
3212       # No changes were made in this revision, so we make the root node
3213       # of the new revision be the same as the last one.
3214       self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3215     else:
3216       self.revs_db[str(self.youngest)] = self.new_root_key
3217       # Copy the new nodes to the nodes_db
3218       for key, value in self.new_nodes.items():
3219         self.nodes_db[key] = value
3220
3221   def _get_node(self, key):
3222     """Returns the node contents for KEY which may refer to either
3223     self.nodes_db or self.new_nodes."""
3224     if self.new_nodes.has_key(key):
3225       return self.new_nodes[key]
3226     else:
3227       return self.nodes_db[key]
3228
3229   def _open_readonly_node(self, path, revnum):
3230     """Open a readonly node for PATH at revision REVNUM.  Returns the
3231     node key and node contents if the path exists, else (None, None)."""
3232     # Get the root key
3233     if revnum == self.youngest:
3234       if self.new_root_key is None:
3235         node_key = self.revs_db[str(self.youngest - 1)]
3236       else:
3237         node_key = self.new_root_key
3238     else:
3239       node_key = self.revs_db[str(revnum)]
3240
3241     for component in path.split('/'):
3242       node_contents = self._get_node(node_key)
3243       node_key = node_contents.get(component, None)
3244       if node_key is None:
3245         return None
3246
3247     return node_key
3248
3249   def _open_writable_root_node(self):
3250     """Open a writable root node.  The current root node is returned
3251     immeditely if it is already writable.  If not, create a new one by
3252     copying the contents of the root node of the previous version."""
3253     if self.new_root_key is not None:
3254       return self.new_root_key, self.new_nodes[self.new_root_key]
3255
3256     if self.youngest < 2:
3257       new_contents = { }
3258     else:
3259       new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3260     self.new_root_key = gen_key()
3261     self.new_nodes = { self.new_root_key: new_contents }
3262
3263     return self.new_root_key, new_contents
3264
3265   def _open_writable_node(self, svn_path, create):
3266     """Open a writable node for the path SVN_PATH, creating SVN_PATH
3267     and any missing directories if CREATE is True."""
3268     parent_key, parent_contents = self._open_writable_root_node()
3269
3270     # Walk up the path, one node at a time.
3271     path_so_far = None
3272     components = svn_path.split('/')
3273     for i in range(len(components)):
3274       component = components[i]
3275       path_so_far = _path_join(path_so_far, component)
3276       this_key = parent_contents.get(component, None)
3277       if this_key is not None:
3278         # The component exists.
3279         this_contents = self.new_nodes.get(this_key, None)
3280         if this_contents is None:
3281           # Suck the node from the nodes_db, but update the key
3282           this_contents = self.nodes_db[this_key]
3283           this_key = gen_key()
3284           self.new_nodes[this_key] = this_contents
3285           parent_contents[component] = this_key
3286       elif create:
3287         # The component does not exists, so we create it.
3288         this_contents = { }
3289         this_key = gen_key()
3290         self.new_nodes[this_key] = this_contents
3291         parent_contents[component] = this_key
3292         if i < len(components) - 1:
3293           self._invoke_delegates('mkdir', path_so_far)
3294       else:
3295         # The component does not exists and we are not instructed to
3296         # create it, so we give up.
3297         return None, None
3298
3299       parent_key = this_key
3300       parent_contents = this_contents
3301
3302     return this_key, this_contents
3303
3304   def _path_exists(self, path):
3305     """If PATH exists in self.youngest of the svn repository mirror,
3306     return true, else return None.
3307
3308     PATH must not start with '/'."""
3309     return self._open_readonly_node(path, self.youngest) is not None
3310
3311   def _fast_delete_path(self, parent_path, parent_contents, component):
3312     """Delete COMPONENT from the parent direcory PARENT_PATH with the
3313     contents PARENT_CONTENTS.  Do nothing if COMPONENT does not exist
3314     in PARENT_CONTENTS."""
3315     if parent_contents.has_key(component):
3316       del parent_contents[component]
3317       self._invoke_delegates('delete_path',
3318                              _path_join(parent_path, component))
3319
3320   def _delete_path(self, svn_path, should_prune=False):
3321     """Delete PATH from the tree.  If SHOULD_PRUNE is true, then delete
3322     all ancestor directories that are made empty when SVN_PATH is deleted.
3323     In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3324
3325     NOTE: This function ignores requests to delete the root directory
3326     or any directory for which Ctx().project.is_unremovable() returns
3327     True, either directly or by pruning."""
3328
3329     if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3330       return
3331
3332     (parent_path, entry,) = _path_split(svn_path)
3333     if parent_path:
3334       parent_key, parent_contents = \
3335           self._open_writable_node(parent_path, False)
3336     else:
3337       parent_key, parent_contents = self._open_writable_root_node()
3338
3339     if parent_key is not None:
3340       self._fast_delete_path(parent_path, parent_contents, entry)
3341       # The following recursion makes pruning an O(n^2) operation in the
3342       # worst case (where n is the depth of SVN_PATH), but the worst case
3343       # is probably rare, and the constant cost is pretty low.  Another
3344       # drawback is that we issue a delete for each path and not just
3345       # a single delete for the topmost directory pruned.
3346       if should_prune and len(parent_contents) == 0:
3347         self._delete_path(parent_path, True)
3348
3349   def _mkdir(self, path):
3350     """Create PATH in the repository mirror at the youngest revision."""
3351     self._open_writable_node(path, True)
3352     self._invoke_delegates('mkdir', path)
3353
3354   def _change_path(self, cvs_rev):
3355     """Register a change in self.youngest for the CVS_REV's svn_path
3356     in the repository mirror."""
3357     # We do not have to update the nodes because our mirror is only
3358     # concerned with the presence or absence of paths, and a file
3359     # content change does not cause any path changes.
3360     self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3361
3362   def _add_path(self, cvs_rev):
3363     """Add the CVS_REV's svn_path to the repository mirror."""
3364     self._open_writable_node(cvs_rev.svn_path, True)
3365     self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3366
3367   def _copy_path(self, src_path, dest_path, src_revnum):
3368     """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3369     DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3370     parent *must* exist, but DEST_PATH *cannot* exist.
3371
3372     Return the node key and the contents of the new node at DEST_PATH
3373     as a dictionary."""
3374     # get the contents of the node of our src_path
3375     src_key = self._open_readonly_node(src_path, src_revnum)
3376     src_contents = self._get_node(src_key)
3377
3378     # Get the parent path and the base path of the dest_path
3379     (dest_parent, dest_basename,) = _path_split(dest_path)
3380     dest_parent_key, dest_parent_contents = \
3381                    self._open_writable_node(dest_parent, False)
3382
3383     if dest_parent_contents.has_key(dest_basename):
3384       msg = "Attempt to add path '%s' to repository mirror " % dest_path
3385       msg = msg + "when it already exists in the mirror."
3386       raise self.SVNRepositoryMirrorPathExistsError, msg
3387
3388     dest_parent_contents[dest_basename] = src_key
3389     self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3390
3391     # Yes sir, src_key and src_contents are also the contents of the
3392     # destination.  This is a cheap copy, remember!  :-)
3393     return src_key, src_contents
3394
3395   def _fill_symbolic_name(self, svn_commit):
3396     """Performs all copies necessary to create as much of the the tag
3397     or branch SVN_COMMIT.symbolic_name as possible given the current
3398     revision of the repository mirror.
3399
3400     The symbolic name is guaranteed to exist in the Subversion
3401     repository by the end of this call, even if there are no paths
3402     under it."""
3403     symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3404         svn_commit.symbolic_name, self.youngest)
3405     # Get the list of sources for the symbolic name.
3406     sources = symbol_fill.get_sources()
3407
3408     if sources:
3409       if self.tags_db.has_key(svn_commit.symbolic_name):
3410         dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3411       else:
3412         dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3413
3414       dest_key = self._open_writable_node(dest_prefix, False)[0]
3415       self._fill(symbol_fill, dest_prefix, dest_key, sources)
3416     else:
3417       # We can only get here for a branch whose first commit is an add
3418       # (as opposed to a copy).
3419       dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3420       if not self._path_exists(dest_path):
3421         # If our symbol_fill was empty, that means that our first
3422         # commit on the branch was to a file added on the branch, and
3423         # that this is our first fill of that branch.
3424         #
3425         # This case is covered by test 16.
3426         #
3427         # ...we create the branch by copying trunk from the our
3428         # current revision number minus 1
3429         source_path = Ctx().project.trunk_path
3430         entries = self._copy_path(source_path, dest_path,
3431                                   svn_commit.revnum - 1)[1]
3432         # Now since we've just copied trunk to a branch that's
3433         # *supposed* to be empty, we delete any entries in the
3434         # copied directory.
3435         for entry in entries.keys():
3436           del_path = dest_path + '/' + entry
3437           # Delete but don't prune.
3438           self._delete_path(del_path)
3439       else:
3440         msg = "Error filling branch '" \
3441               + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3442         msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3443         msg = msg + "attempted to create a branch that already exists."
3444         raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3445
3446   def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3447             path = None, parent_source_prefix = None,
3448             preferred_revnum = None, prune_ok = None):
3449     """Fill the tag or branch at DEST_PREFIX + PATH with items from
3450     SOURCES, and recurse into the child items.
3451
3452     DEST_PREFIX is the prefix of the destination directory, e.g.
3453     '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3454     FillSource classes that are candidates to be copied to the
3455     destination.  DEST_KEY is the key in self.nodes_db to the
3456     destination, or None if the destination does not yet exist.
3457
3458     PATH is the path relative to DEST_PREFIX.  If PATH is None, we
3459     are at the top level, e.g. '/tags/my_tag'.
3460
3461     PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3462     the parent directory, and PREFERRED_REVNUM is an int which is the
3463     source revision number that the caller (who may have copied KEY's
3464     parent) used to perform its copy.  If PREFERRED_REVNUM is None,
3465     then no revision is preferable to any other (which probably means
3466     that no copies have happened yet).
3467
3468     PRUNE_OK means that a copy has been made in this recursion, and
3469     it's safe to prune directories that are not in
3470     SYMBOL_FILL._node_tree, provided that said directory has a source
3471     prefix of one of the PARENT_SOURCE_PREFIX.
3472
3473     PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3474     should only be passed in by recursive calls."""
3475     # Calculate scores and revnums for all sources
3476     for source in sources:
3477       src_revnum, score = symbol_fill.get_best_revnum(source.node,
3478                                                       preferred_revnum)
3479       source.set_score(score, src_revnum)
3480
3481     # Sort the sources in descending score order so that we will make
3482     # a eventual copy from the source with the highest score.
3483     sources.sort()
3484     copy_source = sources[0]
3485
3486     src_path = _path_join(copy_source.prefix, path)
3487     dest_path = _path_join(dest_prefix, path)
3488
3489     # Figure out if we shall copy to this destination and delete any
3490     # destination path that is in the way.
3491     do_copy = 0
3492     if dest_key is None:
3493       do_copy = 1
3494     elif prune_ok and (parent_source_prefix != copy_source.prefix or
3495                        copy_source.revnum != preferred_revnum):
3496       # We are about to replace the destination, so we need to remove
3497       # it before we perform the copy.
3498       self._delete_path(dest_path)
3499       do_copy = 1
3500
3501     if do_copy:
3502       dest_key, dest_entries = self._copy_path(src_path, dest_path,
3503                                                copy_source.revnum)
3504       prune_ok = 1
3505     else:
3506       dest_entries = self._get_node(dest_key)
3507
3508     # Create the SRC_ENTRIES hash from SOURCES.  The keys are path
3509     # elements and the values are lists of FillSource classes where
3510     # this path element exists.
3511     src_entries = {}
3512     for source in sources:
3513       if isinstance(source.node, SvnRevisionRange):
3514         continue
3515       for entry, node in source.node.items():
3516         src_entries.setdefault(entry, []).append(
3517             FillSource(source.prefix, node))
3518
3519     if prune_ok:
3520       # Delete the entries in DEST_ENTRIES that are not in src_entries.
3521       delete_list = [ ]
3522       for entry in dest_entries.keys():
3523         if not src_entries.has_key(entry):
3524           delete_list.append(entry)
3525       if delete_list:
3526         if not self.new_nodes.has_key(dest_key):
3527           dest_key, dest_entries = self._open_writable_node(dest_path, True)
3528         # Sort the delete list to get "diffable" dumpfiles.
3529         delete_list.sort()
3530         for entry in delete_list:
3531           self._fast_delete_path(dest_path, dest_entries, entry)
3532
3533     # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3534     src_keys = src_entries.keys()
3535     src_keys.sort()
3536     for src_key in src_keys:
3537       next_dest_key = dest_entries.get(src_key, None)
3538       self._fill(symbol_fill, dest_prefix, next_dest_key,
3539                  src_entries[src_key], _path_join(path, src_key),
3540                  copy_source.prefix, sources[0].revnum, prune_ok)
3541
3542   def _synchronize_default_branch(self, svn_commit):
3543     """Propagate any changes that happened on a non-trunk default
3544     branch to the trunk of the repository.  See
3545     CVSCommit._post_commit() for details on why this is necessary."""
3546     for cvs_rev in svn_commit.cvs_revs:
3547       svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3548       if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3549         if self._path_exists(svn_trunk_path):
3550           # Delete the path on trunk...
3551           self._delete_path(svn_trunk_path)
3552         # ...and copy over from branch
3553         self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3554                         svn_commit.motivating_revnum)
3555       elif cvs_rev.op == OP_DELETE:
3556         # delete trunk path
3557         self._delete_path(svn_trunk_path)
3558       else:
3559         msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3560                % cvs_rev.op)
3561         raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3562
3563   def commit(self, svn_commit):
3564     """Add an SVNCommit to the SVNRepository, incrementing the
3565     Repository revision number, and changing the repository.  Invoke
3566     the delegates' _start_commit() method."""
3567
3568     if svn_commit.revnum == 2:
3569       self._initialize_repository(svn_commit.get_date())
3570
3571     self._start_commit(svn_commit)
3572
3573     if svn_commit.symbolic_name:
3574       Log().write(LOG_VERBOSE, "Filling symbolic name:",
3575                   _clean_symbolic_name(svn_commit.symbolic_name))
3576       self._fill_symbolic_name(svn_commit)
3577     elif svn_commit.motivating_revnum:
3578       Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3579                   % svn_commit.motivating_revnum)
3580       self._synchronize_default_branch(svn_commit)
3581     else: # This actually commits CVSRevisions
3582       if len(svn_commit.cvs_revs) > 1: plural = "s"
3583       else: plural = ""
3584       Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3585                   % (len(svn_commit.cvs_revs), plural))
3586       for cvs_rev in svn_commit.cvs_revs:
3587         # See comment in CVSCommit._commit() for what this is all
3588         # about.  Note that although asking self._path_exists() is
3589         # somewhat expensive, we only do it if the first two (cheap)
3590         # tests succeed first.
3591         if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3592                 and (cvs_rev.rev == "1.1.1.1")
3593                 and self._path_exists(cvs_rev.svn_path)):
3594           if cvs_rev.op == OP_ADD:
3595             self._add_path(cvs_rev)
3596           elif cvs_rev.op == OP_CHANGE:
3597             # Fix for Issue #74:
3598             #
3599             # Here's the scenario.  You have file FOO that is imported
3600             # on a non-trunk vendor branch.  So in r1.1 and r1.1.1.1,
3601             # the file exists.
3602             #
3603             # Moving forward in time, FOO is deleted on the default
3604             # branch (r1.1.1.2).  cvs2svn determines that this delete
3605             # also needs to happen on trunk, so FOO is deleted on
3606             # trunk.
3607             #
3608             # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3609             # not 'dead', we assume it's a change).  However, since
3610             # our trunk file has been deleted, svnadmin blows up--you
3611             # can't change a file that doesn't exist!
3612             #
3613             # Soooo... we just check the path, and if it doesn't
3614             # exist, we do an add... if the path does exist, it's
3615             # business as usual.
3616             if not self._path_exists(cvs_rev.svn_path):
3617               self._add_path(cvs_rev)
3618             else:
3619               self._change_path(cvs_rev)
3620
3621         if cvs_rev.op == OP_DELETE:
3622           self._delete_path(cvs_rev.svn_path, Ctx().prune)
3623
3624   def cleanup(self):
3625     """Callback for the Cleanup.register in self.__init__."""
3626     self.revs_db = None
3627     self.nodes_db = None
3628
3629   def add_delegate(self, delegate):
3630     """Adds DELEGATE to self.delegates.
3631
3632     For every delegate you add, as soon as SVNRepositoryMirror
3633     performs a repository action method, SVNRepositoryMirror will call
3634     the delegate's corresponding repository action method.  Multiple
3635     delegates will be called in the order that they are added.  See
3636     SVNRepositoryMirrorDelegate for more information."""
3637     self.delegates.append(delegate)
3638
3639   def _invoke_delegates(self, method, *args):
3640     """Iterate through each of our delegates, in the order that they
3641     were added, and call the delegate's method named METHOD with the
3642     arguments in ARGS."""
3643     for delegate in self.delegates:
3644       getattr(delegate, method)(*args)
3645
3646   def finish(self):
3647     """Calls the delegate finish method."""
3648     self._end_commit()
3649     self._invoke_delegates('finish')
3650     self.cleanup()
3651
3652
3653 class SVNCommitItem:
3654   """A wrapper class for CVSRevision objects upon which
3655   Subversion-related data (such as properties) may be hung."""
3656
3657   def __init__(self, c_rev, svn_props_changed):
3658     """Initialize instance and record the properties for this file.
3659     SVN_PROPS_CHANGED indicates whether the svn: properties are known
3660     to have changed since the last revision.
3661
3662     The properties are set by the SVNPropertySetters in
3663     Ctx().svn_property_setters, then we read a couple of the
3664     properties back out for our own purposes."""
3665
3666     self.c_rev = c_rev
3667     # Did the svn properties change for this file (i.e., do they have
3668     # to be written to the dumpfile?)
3669     self.svn_props_changed = svn_props_changed
3670
3671     # The properties for this item as a map { key : value }.  If VALUE
3672     # is None, no property should be set.
3673     self.svn_props = { }
3674
3675     for svn_property_setter in Ctx().svn_property_setters:
3676       svn_property_setter.set_properties(self)
3677
3678     # Remember if we need to filter the EOLs.  We could actually use
3679     # self.svn_props now, since it is initialized for each revision.
3680     self.needs_eol_filter = \
3681         self.svn_props.get('svn:eol-style', None) is not None
3682
3683     self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3684
3685
3686 class SVNPropertySetter:
3687   """Abstract class for objects that can set properties on a SVNCommitItem."""
3688
3689   def set_properties(self, s_item):
3690     """Set any properties that can be determined for S_ITEM."""
3691
3692     raise NotImplementedError
3693
3694
3695 class SVNRepositoryMirrorDelegate:
3696   """Abstract superclass for any delegate to SVNRepositoryMirror.
3697   Subclasses must implement all of the methods below.
3698
3699   For each method, a subclass implements, in its own way, the
3700   Subversion operation implied by the method's name.  For example, for
3701   the add_path method, the DumpfileDelegate would write out a
3702   "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3703   would merely print that the path is being added to the repository,
3704   and the RepositoryDelegate would actually cause the path to be added
3705   to the Subversion repository that it is creating.
3706   """
3707
3708   def start_commit(self, svn_commit):
3709     """Perform any actions needed to start SVNCommit SVN_COMMIT;
3710     see subclass implementation for details."""
3711     raise NotImplementedError
3712
3713   def mkdir(self, path):
3714     """PATH is a string; see subclass implementation for details."""
3715     raise NotImplementedError
3716
3717   def add_path(self, s_item):
3718     """S_ITEM is an SVNCommitItem; see subclass implementation for
3719     details."""
3720     raise NotImplementedError
3721
3722   def change_path(self, s_item):
3723     """S_ITEM is an SVNCommitItem; see subclass implementation for
3724     details."""
3725     raise NotImplementedError
3726
3727   def delete_path(self, path):
3728     """PATH is a string; see subclass implementation for
3729     details."""
3730     raise NotImplementedError
3731
3732   def copy_path(self, src_path, dest_path, src_revnum):
3733     """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3734     subversion revision number (int); see subclass implementation for
3735     details."""
3736     raise NotImplementedError
3737
3738   def finish(self):
3739     """Perform any cleanup necessary after all revisions have been
3740     committed."""
3741     raise NotImplementedError
3742
3743
3744 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3745   """Create a Subversion dumpfile."""
3746
3747   def __init__(self, dumpfile_path=None):
3748     """Return a new DumpfileDelegate instance, attached to a dumpfile
3749     DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3750     if dumpfile_path:
3751       self.dumpfile_path = dumpfile_path
3752     else:
3753       self.dumpfile_path = Ctx().dumpfile
3754
3755     self.dumpfile = open(self.dumpfile_path, 'wb')
3756     self._write_dumpfile_header(self.dumpfile)
3757
3758   def _write_dumpfile_header(self, dumpfile):
3759     # Initialize the dumpfile with the standard headers.
3760     #
3761     # Since the CVS repository doesn't have a UUID, and the Subversion
3762     # repository will be created with one anyway, we don't specify a
3763     # UUID in the dumpflie
3764     dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3765
3766   def _utf8_path(self, path):
3767     """Return a copy of PATH encoded in UTF-8."""
3768     pieces = string.split(path, '/')
3769     # Convert each path component separately (as they may each use
3770     # different encodings).
3771     for i in range(len(pieces)):
3772       try:
3773         # Log messages can be converted with the 'replace' strategy,
3774         # but we can't afford any lossiness here.
3775         pieces[i] = to_utf8(pieces[i], 'strict')
3776       except UnicodeError:
3777         raise FatalError(
3778             "Unable to convert a path '%s' to internal encoding.\n"
3779             "Consider rerunning with (for example) '--encoding=latin1'."
3780             % (path,))
3781     return string.join(pieces, '/')
3782
3783   def _string_for_prop(self, name, value):
3784     """Return a property in the form needed for the dumpfile."""
3785
3786     return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3787
3788   def start_commit(self, svn_commit):
3789     """Emit the start of SVN_COMMIT (an SVNCommit)."""
3790
3791     self.revision = svn_commit.revnum
3792
3793     # The start of a new commit typically looks like this:
3794     #
3795     #   Revision-number: 1
3796     #   Prop-content-length: 129
3797     #   Content-length: 129
3798     #
3799     #   K 7
3800     #   svn:log
3801     #   V 27
3802     #   Log message for revision 1.
3803     #   K 10
3804     #   svn:author
3805     #   V 7
3806     #   jrandom
3807     #   K 8
3808     #   svn:date
3809     #   V 27
3810     #   2003-04-22T22:57:58.132837Z
3811     #   PROPS-END
3812     #
3813     # Notice that the length headers count everything -- not just the
3814     # length of the data but also the lengths of the lengths, including
3815     # the 'K ' or 'V ' prefixes.
3816     #
3817     # The reason there are both Prop-content-length and Content-length
3818     # is that the former includes just props, while the latter includes
3819     # everything.  That's the generic header form for any entity in a
3820     # dumpfile.  But since revisions only have props, the two lengths
3821     # are always the same for revisions.
3822
3823     # Calculate the output needed for the property definitions.
3824     props = svn_commit.get_revprops()
3825     prop_names = props.keys()
3826     prop_names.sort()
3827     prop_strings = []
3828     for propname in prop_names:
3829       if props[propname] is not None:
3830         prop_strings.append(self._string_for_prop(propname, props[propname]))
3831
3832     all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3833     total_len = len(all_prop_strings)
3834
3835     # Print the revision header and props
3836     self.dumpfile.write('Revision-number: %d\n'
3837                         'Prop-content-length: %d\n'
3838                         'Content-length: %d\n'
3839                         '\n'
3840                         % (self.revision, total_len, total_len))
3841
3842     self.dumpfile.write(all_prop_strings)
3843     self.dumpfile.write('\n')
3844
3845   def mkdir(self, path):
3846     """Emit the creation of directory PATH."""
3847     self.dumpfile.write("Node-path: %s\n"
3848                         "Node-kind: dir\n"
3849                         "Node-action: add\n"
3850                         "\n"
3851                         "\n" % self._utf8_path(path))
3852
3853   def _add_or_change_path(self, s_item, op):
3854     """Emit the addition or change corresponding to S_ITEM.
3855     OP is either the constant OP_ADD or OP_CHANGE."""
3856
3857     # Validation stuffs
3858     if op == OP_ADD:
3859       action = 'add'
3860     elif op == OP_CHANGE:
3861       action = 'change'
3862     else:
3863       raise FatalError("_add_or_change_path() called with bad op ('%s')"
3864                        % (op,))
3865
3866     # Convenience variables
3867     c_rev = s_item.c_rev
3868
3869     # The property handling here takes advantage of an undocumented
3870     # but IMHO consistent feature of the Subversion dumpfile-loading
3871     # code.  When a node's properties aren't mentioned (that is, the
3872     # "Prop-content-length:" header is absent, no properties are
3873     # listed at all, and there is no "PROPS-END\n" line) then no
3874     # change is made to the node's properties.
3875     #
3876     # This is consistent with the way dumpfiles behave w.r.t. text
3877     # content changes, so I'm comfortable relying on it.  If you
3878     # commit a change to *just* the properties of some node that
3879     # already has text contents from a previous revision, then in the
3880     # dumpfile output for the prop change, no "Text-content-length:"
3881     # nor "Text-content-md5:" header will be present, and the text of
3882     # the file will not be given.  But this does not cause the file's
3883     # text to be erased!  It simply remains unchanged.
3884     #
3885     # This works out great for cvs2svn, due to lucky coincidences:
3886     #
3887     # For files, the only properties we ever set are set in the first
3888     # revision; all other revisions (including on branches) inherit
3889     # from that.  After the first revision, we never change file
3890     # properties, therefore, there is no need to remember the full set
3891     # of properties on a given file once we've set it.
3892     #
3893     # For directories, the only property we set is "svn:ignore", and
3894     # while we may change it after the first revision, we always do so
3895     # based on the contents of a ".cvsignore" file -- in other words,
3896     # CVS is doing the remembering for us, so we still don't have to
3897     # preserve the previous value of the property ourselves.
3898
3899     # Calculate the (sorted-by-name) property string and length, if any.
3900     if s_item.svn_props_changed:
3901       svn_props = s_item.svn_props
3902       prop_contents = ''
3903       prop_names = svn_props.keys()
3904       prop_names.sort()
3905       for pname in prop_names:
3906         pvalue = svn_props[pname]
3907         if pvalue is not None:
3908           prop_contents += self._string_for_prop(pname, pvalue)
3909       prop_contents += 'PROPS-END\n'
3910       props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3911     else:
3912       prop_contents = ''
3913       props_header = ''
3914
3915     # treat .cvsignore as a directory property
3916     dir_path, basename = os.path.split(c_rev.svn_path)
3917     if basename == ".cvsignore":
3918       ignore_vals = generate_ignores(c_rev)
3919       ignore_contents = '\n'.join(ignore_vals)
3920       ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3921                          (len(ignore_contents), ignore_contents))
3922       ignore_contents = ignore_contents + 'PROPS-END\n'
3923       ignore_len = len(ignore_contents)
3924
3925       # write headers, then props
3926       self.dumpfile.write('Node-path: %s\n'
3927                           'Node-kind: dir\n'
3928                           'Node-action: change\n'
3929                           'Prop-content-length: %d\n'
3930                           'Content-length: %d\n'
3931                           '\n'
3932                           '%s'
3933                           % (self._utf8_path(dir_path), ignore_len,
3934                              ignore_len, ignore_contents))
3935
3936     # If the file has keywords, we must prevent CVS/RCS from expanding
3937     # the keywords because they must be unexpanded in the repository,
3938     # or Subversion will get confused.
3939     pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3940         c_rev, suppress_keyword_substitution=s_item.has_keywords)
3941
3942     self.dumpfile.write('Node-path: %s\n'
3943                         'Node-kind: file\n'
3944                         'Node-action: %s\n'
3945                         '%s'  # no property header if no props
3946                         'Text-content-length: '
3947                         % (self._utf8_path(c_rev.svn_path),
3948                            action, props_header))
3949
3950     pos = self.dumpfile.tell()
3951
3952     self.dumpfile.write('0000000000000000\n'
3953                         'Text-content-md5: 00000000000000000000000000000000\n'
3954                         'Content-length: 0000000000000000\n'
3955                         '\n')
3956
3957     if prop_contents:
3958       self.dumpfile.write(prop_contents)
3959
3960     # Insert a filter to convert all EOLs to LFs if neccessary
3961     if s_item.needs_eol_filter:
3962       data_reader = LF_EOL_Filter(pipe.stdout)
3963     else:
3964       data_reader = pipe.stdout
3965
3966     # Insert the rev contents, calculating length and checksum as we go.
3967     checksum = md5.new()
3968     length = 0
3969     while True:
3970       buf = data_reader.read(PIPE_READ_SIZE)
3971       if buf == '':
3972         break
3973       checksum.update(buf)
3974       length = length + len(buf)
3975       self.dumpfile.write(buf)
3976
3977     pipe.stdout.close()
3978     error_output = pipe.stderr.read()
3979     exit_status = pipe.wait()
3980     if exit_status:
3981       raise FatalError("The command '%s' failed with exit status: %s\n"
3982                        "and the following output:\n"
3983                        "%s" % (pipe_cmd, exit_status, error_output))
3984
3985     # Go back to patch up the length and checksum headers:
3986     self.dumpfile.seek(pos, 0)
3987     # We left 16 zeros for the text length; replace them with the real
3988     # length, padded on the left with spaces:
3989     self.dumpfile.write('%16d' % length)
3990     # 16... + 1 newline + len('Text-content-md5: ') == 35
3991     self.dumpfile.seek(pos + 35, 0)
3992     self.dumpfile.write(checksum.hexdigest())
3993     # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3994     self.dumpfile.seek(pos + 84, 0)
3995     # The content length is the length of property data, text data,
3996     # and any metadata around/inside around them.
3997     self.dumpfile.write('%16d' % (length + len(prop_contents)))
3998     # Jump back to the end of the stream
3999     self.dumpfile.seek(0, 2)
4000
4001     # This record is done (write two newlines -- one to terminate
4002     # contents that weren't themselves newline-termination, one to
4003     # provide a blank line for readability.
4004     self.dumpfile.write('\n\n')
4005
4006   def add_path(self, s_item):
4007     """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4008     self._add_or_change_path(s_item, OP_ADD)
4009
4010   def change_path(self, s_item):
4011     """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4012     self._add_or_change_path(s_item, OP_CHANGE)
4013
4014   def delete_path(self, path):
4015     """Emit the deletion of PATH."""
4016     self.dumpfile.write('Node-path: %s\n'
4017                         'Node-action: delete\n'
4018                         '\n' % self._utf8_path(path))
4019
4020   def copy_path(self, src_path, dest_path, src_revnum):
4021     """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4022     # We don't need to include "Node-kind:" for copies; the loader
4023     # ignores it anyway and just uses the source kind instead.
4024     self.dumpfile.write('Node-path: %s\n'
4025                         'Node-action: add\n'
4026                         'Node-copyfrom-rev: %d\n'
4027                         'Node-copyfrom-path: /%s\n'
4028                         '\n'
4029                         % (self._utf8_path(dest_path),
4030                            src_revnum,
4031                            self._utf8_path(src_path)))
4032
4033   def finish(self):
4034     """Perform any cleanup necessary after all revisions have been
4035     committed."""
4036     self.dumpfile.close()
4037
4038
4039 class RepositoryDelegate(DumpfileDelegate):
4040   """Creates a new Subversion Repository.  DumpfileDelegate does all
4041   of the heavy lifting."""
4042   def __init__(self):
4043     self.svnadmin = Ctx().svnadmin
4044     self.target = Ctx().target
4045     if not Ctx().existing_svnrepos:
4046       Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4047       if not Ctx().fs_type:
4048         # User didn't say what kind repository (bdb, fsfs, etc).
4049         # We still pass --bdb-txn-nosync.  It's a no-op if the default
4050         # repository type doesn't support it, but we definitely want
4051         # it if BDB is the default.
4052         run_command('%s create %s "%s"' % (self.svnadmin,
4053                                            "--bdb-txn-nosync",
4054                                            self.target))
4055       elif Ctx().fs_type == 'bdb':
4056         # User explicitly specified bdb.
4057         #
4058         # Since this is a BDB repository, pass --bdb-txn-nosync,
4059         # because it gives us a 4-5x speed boost (if cvs2svn is
4060         # creating the repository, cvs2svn should be the only program
4061         # accessing the svn repository (until cvs is done, at least)).
4062         # But we'll turn no-sync off in self.finish(), unless
4063         # instructed otherwise.
4064         run_command('%s create %s %s "%s"' % (self.svnadmin,
4065                                               "--fs-type=bdb",
4066                                               "--bdb-txn-nosync",
4067                                               self.target))
4068       else:
4069         # User specified something other than bdb.
4070         run_command('%s create %s "%s"' % (self.svnadmin,
4071                                            "--fs-type=%s" % Ctx().fs_type,
4072                                            self.target))
4073
4074     # Since the output of this run is a repository, not a dumpfile,
4075     # the temporary dumpfiles we create should go in the tmpdir.
4076     DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4077
4078     # This is 1 if a commit is in progress, otherwise None.
4079     self._commit_in_progress = None
4080
4081     self.dumpfile = open(self.dumpfile_path, 'w+b')
4082     self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4083                                      self.target ], True)
4084     self.loader_pipe.stdout.close()
4085     try:
4086       self._write_dumpfile_header(self.loader_pipe.stdin)
4087     except IOError:
4088       raise FatalError("svnadmin failed with the following output while "
4089                        "loading the dumpfile:\n"
4090                        + self.loader_pipe.stderr.read())
4091
4092   def _feed_pipe(self):
4093     """Feed the revision stored in the dumpfile to the svnadmin
4094     load pipe."""
4095     self.dumpfile.seek(0)
4096     while 1:
4097       data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4098       if not len(data):
4099         break
4100       try:
4101         self.loader_pipe.stdin.write(data)
4102       except IOError:
4103         raise FatalError("svnadmin failed with the following output "
4104                          "while loading the dumpfile:\n"
4105                          + self.loader_pipe.stderr.read())
4106
4107   def start_commit(self, svn_commit):
4108     """Start a new commit.  If a commit is already in progress, close
4109     the dumpfile, load it into the svn repository, open a new
4110     dumpfile, and write the header into it."""
4111     if self._commit_in_progress:
4112       self._feed_pipe()
4113     self.dumpfile.seek(0)
4114     self.dumpfile.truncate()
4115     DumpfileDelegate.start_commit(self, svn_commit)
4116     self._commit_in_progress = 1
4117
4118   def finish(self):
4119     """Loads the last commit into the repository."""
4120     self._feed_pipe()
4121     self.dumpfile.close()
4122     self.loader_pipe.stdin.close()
4123     error_output = self.loader_pipe.stderr.read()
4124     exit_status = self.loader_pipe.wait()
4125     if exit_status:
4126       raise FatalError('svnadmin load failed with exit status: %s\n'
4127                        'and the following output:\n'
4128                        '%s' % (exit_status, error_output,))
4129     os.remove(self.dumpfile_path)
4130
4131     # If this is a BDB repository, and we created the repository, and
4132     # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4133     # line in the DB_CONFIG file, because txn syncing should be on by
4134     # default in BDB repositories.
4135     #
4136     # We determine if this is a BDB repository by looking for the
4137     # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4138     # checking Ctx().fs_type.  That way this code will Do The Right
4139     # Thing in all circumstances.
4140     db_config = os.path.join(self.target, "db/DB_CONFIG")
4141     if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4142         and os.path.exists(db_config)):
4143       no_sync = 'set_flags DB_TXN_NOSYNC\n'
4144
4145       contents = open(db_config, 'r').readlines()
4146       index = contents.index(no_sync)
4147       contents[index] = '# ' + no_sync
4148       contents = open(db_config, 'w').writelines(contents)
4149
4150
4151 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4152   """Makes no changes to the disk, but writes out information to
4153   STDOUT about what the SVNRepositoryMirror is doing.  Of course, our
4154   print statements will state that we're doing something, when in
4155   reality, we aren't doing anything other than printing out that we're
4156   doing something.  Kind of zen, really."""
4157   def __init__(self, total_revs):
4158     self.total_revs = total_revs
4159
4160   def start_commit(self, svn_commit):
4161     """Prints out the Subversion revision number of the commit that is
4162     being started."""
4163     Log().write(LOG_VERBOSE, "=" * 60)
4164     Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4165                 (svn_commit.revnum, self.total_revs))
4166
4167   def mkdir(self, path):
4168     """Print a line stating that we are creating directory PATH."""
4169     Log().write(LOG_VERBOSE, "  New Directory", path)
4170
4171   def add_path(self, s_item):
4172     """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4173     Log().write(LOG_VERBOSE, "  Adding", s_item.c_rev.svn_path)
4174
4175   def change_path(self, s_item):
4176     """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4177     Log().write(LOG_VERBOSE, "  Changing", s_item.c_rev.svn_path)
4178
4179   def delete_path(self, path):
4180     """Print a line stating that we are 'deleting' PATH."""
4181     Log().write(LOG_VERBOSE, "  Deleting", path)
4182
4183   def copy_path(self, src_path, dest_path, src_revnum):
4184     """Print a line stating that we are 'copying' revision SRC_REVNUM
4185     of SRC_PATH to DEST_PATH."""
4186     Log().write(LOG_VERBOSE, "  Copying revision", src_revnum, "of", src_path)
4187     Log().write(LOG_VERBOSE, "                to", dest_path)
4188
4189   def finish(self):
4190     """State that we are done creating our repository."""
4191     Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4192     Log().write(LOG_QUIET, "Done.")
4193
4194 # This should be a local to pass1,
4195 # but Python 2.0 does not support nested scopes.
4196 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4197 def pass1():
4198   Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4199   cd = CollectData()
4200
4201   def visit_file(baton, dirname, files):
4202     cd = baton
4203     for fname in files:
4204       if fname[-2:] != ',v':
4205         continue
4206       cd.found_valid_file = 1
4207       pathname = os.path.join(dirname, fname)
4208       if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4209         # drop the 'Attic' portion from the pathname for the canonical name.
4210         cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4211       else:
4212         # If this file also exists in the attic, it's a fatal error
4213         attic_path = os.path.join(dirname, 'Attic', fname)
4214         if os.path.exists(attic_path):
4215           err = "%s: A CVS repository cannot contain both %s and %s" \
4216                 % (error_prefix, pathname, attic_path)
4217           sys.stderr.write(err + '\n')
4218           cd.fatal_errors.append(err)
4219         cd.set_fname(pathname, pathname)
4220       Log().write(LOG_NORMAL, pathname)
4221       try:
4222         cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4223       except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4224               RuntimeError):
4225         err = "%s: '%s' is not a valid ,v file" \
4226               % (error_prefix, pathname)
4227         sys.stderr.write(err + '\n')
4228         cd.fatal_errors.append(err)
4229       except:
4230         Log().write(LOG_WARN,
4231                     "Exception occurred while parsing %s" % pathname)
4232         raise
4233
4234   os.path.walk(Ctx().cvsroot, visit_file, cd)
4235   Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4236
4237   cd.write_symbol_db()
4238
4239   if len(cd.fatal_errors) > 0:
4240     raise FatalException("Pass 1 complete.\n"
4241                          + "=" * 75 + "\n"
4242                          + "Error summary:\n"
4243                          + "\n".join(cd.fatal_errors) + "\n"
4244                          + "Exited due to fatal error(s).\n")
4245
4246   if cd.found_valid_file is None:
4247     raise FatalException(
4248         "\n"
4249         "No RCS files found in your CVS Repository!\n"
4250         "Are you absolutely certain you are pointing cvs2svn\n"
4251         "at a CVS repository?\n"
4252         "\n"
4253         "Exited due to fatal error(s).\n")
4254
4255   StatsKeeper().reset_c_rev_info()
4256   StatsKeeper().archive()
4257   Log().write(LOG_QUIET, "Done")
4258
4259 def pass2():
4260   "Pass 2: clean up the revision information."
4261
4262   symbol_db = SymbolDatabase()
4263   symbol_db.read()
4264
4265   # Convert the list of regexps to a list of strings
4266   excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4267
4268   error_detected = 0
4269
4270   Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4271   blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4272   if blocked_excludes:
4273     for branch, blockers in blocked_excludes.items():
4274       sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4275                        "excluded because the following symbols depend "
4276                        "on it:\n" % (branch))
4277       for blocker in blockers:
4278         sys.stderr.write("    '%s'\n" % (blocker))
4279     sys.stderr.write("\n")
4280     error_detected = 1
4281
4282   Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4283   invalid_forced_tags = [ ]
4284   for forced_tag in Ctx().forced_tags:
4285     if excludes.has_key(forced_tag):
4286       continue
4287     if symbol_db.branch_has_commit(forced_tag):
4288       invalid_forced_tags.append(forced_tag)
4289   if invalid_forced_tags:
4290     sys.stderr.write(error_prefix + ": The following branches cannot be "
4291                      "forced to be tags because they have commits:\n")
4292     for tag in invalid_forced_tags:
4293       sys.stderr.write("    '%s'\n" % (tag))
4294     sys.stderr.write("\n")
4295     error_detected = 1
4296
4297   Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4298   mismatches = symbol_db.find_mismatches(excludes)
4299   def is_not_forced(mismatch):
4300     name = mismatch[0]
4301     return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4302   mismatches = filter(is_not_forced, mismatches)
4303   if mismatches:
4304     sys.stderr.write(error_prefix + ": The following symbols are tags "
4305                      "in some files and branches in others.\nUse "
4306                      "--force-tag, --force-branch and/or --exclude to "
4307                      "resolve the symbols.\n")
4308     for name, tag_count, branch_count, commit_count in mismatches:
4309       sys.stderr.write("    '%s' is a tag in %d files, a branch in "
4310                        "%d files and has commits in %d files.\n"
4311                        % (name, tag_count, branch_count, commit_count))
4312     error_detected = 1
4313
4314   # Bail out now if we found errors
4315   if error_detected:
4316     sys.exit(1)
4317
4318   # Create the tags database
4319   tags_db = TagsDatabase(DB_OPEN_NEW)
4320   for tag in symbol_db.tags.keys():
4321     if tag not in Ctx().forced_branches:
4322       tags_db[tag] = None
4323   for tag in Ctx().forced_tags:
4324     tags_db[tag] = None
4325
4326   Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4327
4328   # We may have recorded some changes in revisions' timestamp.  We need to
4329   # scan for any other files which may have had the same log message and
4330   # occurred at "the same time" and change their timestamps, too.
4331
4332   # read the resync data file
4333   def read_resync(fname):
4334     "Read the .resync file into memory."
4335
4336     ### note that we assume that we can hold the entire resync file in
4337     ### memory. really large repositories with whacky timestamps could
4338     ### bust this assumption. should that ever happen, then it is possible
4339     ### to split the resync file into pieces and make multiple passes,
4340     ### using each piece.
4341
4342     #
4343     # A digest maps to a sequence of lists which specify a lower and upper
4344     # time bound for matching up the commit.  We keep a sequence of these
4345     # because a number of checkins with the same log message (e.g. an empty
4346     # log message) could need to be remapped.  We also make them a list
4347     # because we will dynamically expand the lower/upper bound as we find
4348     # commits that fall into a particular msg and time range.
4349     #
4350     # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4351     #
4352     resync = { }
4353
4354     for line in fileinput.FileInput(fname):
4355       t1 = int(line[:8], 16)
4356       digest = line[9:DIGEST_END_IDX]
4357       t2 = int(line[DIGEST_END_IDX+1:], 16)
4358       t1_l = t1 - COMMIT_THRESHOLD/2
4359       t1_u = t1 + COMMIT_THRESHOLD/2
4360       resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4361
4362     # For each digest, sort the resync items in it in increasing order,
4363     # based on the lower time bound.
4364     for val in resync.values():
4365       val.sort()
4366
4367     return resync
4368
4369   resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4370
4371   output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4372   Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4373
4374   tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4375   Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4376
4377   # process the revisions file, looking for items to clean up
4378   for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4379     c_rev = CVSRevision(Ctx(), line[:-1])
4380
4381     # Skip this entire revision if it's on an excluded branch
4382     if excludes.has_key(c_rev.branch_name):
4383       continue
4384
4385     new_prev_ts = None
4386     if c_rev.prev_rev is not None:
4387       new_prev_ts = tweaked_timestamps_db.get(
4388         c_rev.unique_key(c_rev.prev_rev), None)
4389     if new_prev_ts:
4390       c_rev.prev_timestamp = new_prev_ts
4391
4392     new_next_ts = None
4393     if c_rev.next_rev is not None:
4394       new_next_ts = tweaked_timestamps_db.get(
4395         c_rev.unique_key(c_rev.next_rev), None)
4396     if new_next_ts:
4397       c_rev.next_timestamp = new_next_ts
4398
4399     # Remove all references to excluded tags and branches
4400     def not_excluded(symbol, excludes=excludes):
4401       return not excludes.has_key(symbol)
4402     c_rev.branches = filter(not_excluded, c_rev.branches)
4403     c_rev.tags = filter(not_excluded, c_rev.tags)
4404
4405     # Convert all branches that are forced to be tags
4406     for forced_tag in Ctx().forced_tags:
4407       if forced_tag in c_rev.branches:
4408         c_rev.branches.remove(forced_tag)
4409         c_rev.tags.append(forced_tag)
4410
4411     # Convert all tags that are forced to be branches
4412     for forced_branch in Ctx().forced_branches:
4413       if forced_branch in c_rev.tags:
4414         c_rev.tags.remove(forced_branch)
4415         c_rev.branches.append(forced_branch)
4416
4417     # see if this is "near" any of the resync records we
4418     # have recorded for this digest [of the log message].
4419     for record in resync.get(c_rev.digest, []):
4420       if record[2] == c_rev.timestamp:
4421         # This means that either c_rev is the same revision that
4422         # caused the resync record to exist, or c_rev is a different
4423         # CVS revision that happens to have the same timestamp.  In
4424         # either case, we don't have to do anything, so we...
4425         continue
4426
4427       if record[0] <= c_rev.timestamp <= record[1]:
4428         # bingo!  We probably want to remap the time on this c_rev,
4429         # unless the remapping would be useless because the new time
4430         # would fall outside the COMMIT_THRESHOLD window for this
4431         # commit group.
4432         new_timestamp = record[2]
4433         # If the new timestamp is earlier than that of our previous revision
4434         if new_timestamp < c_rev.prev_timestamp:
4435           desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4436                   + " to time %s, which is before previous the time of"
4437                   + " revision %s (%s):")
4438           Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4439                                         c_rev.cvs_path, new_timestamp,
4440                                         c_rev.prev_rev, c_rev.prev_timestamp))
4441           # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4442           # the timestamp of c_rev within COMMIT_THRESHOLD of the
4443           # attempted resync time, then sync back to c_rev.prev_timestamp
4444           # + 1...
4445           if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4446             new_timestamp = c_rev.prev_timestamp + 1
4447             Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4448                                                           new_timestamp))
4449           else:
4450             Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4451                         warning_prefix)
4452             continue
4453
4454         # If the new timestamp is later than that of our next revision
4455         elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4456           desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4457                   + " to time %s, which is after time of next"
4458                   + " revision %s (%s):")
4459           Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4460                                         c_rev.cvs_path, new_timestamp,
4461                                         c_rev.prev_rev, c_rev.next_timestamp))
4462           # If resyncing our rev to c_rev.next_timestamp - 1 will place
4463           # the timestamp of c_rev within COMMIT_THRESHOLD of the
4464           # attempted resync time, then sync forward to c_rev.next_timestamp
4465           # - 1...
4466           if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4467             new_timestamp = c_rev.next_timestamp - 1
4468             Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4469                                                           new_timestamp))
4470           else:
4471             Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4472                         warning_prefix)
4473             continue
4474
4475         # Fix for Issue #71: Avoid resyncing two consecutive revisions
4476         # to the same timestamp.
4477         elif (new_timestamp == c_rev.prev_timestamp
4478               or new_timestamp == c_rev.next_timestamp):
4479           continue
4480
4481         # adjust the time range. we want the COMMIT_THRESHOLD from the
4482         # bounds of the earlier/latest commit in this group.
4483         record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4484         record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4485
4486         msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4487               % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4488                  new_timestamp - c_rev.timestamp)
4489         Log().write(LOG_VERBOSE, msg)
4490
4491         c_rev.timestamp = new_timestamp
4492         tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4493
4494         # stop looking for hits
4495         break
4496
4497     output.write(str(c_rev) + "\n")
4498   Log().write(LOG_QUIET, "Done")
4499
4500 def pass3():
4501   Log().write(LOG_QUIET, "Sorting CVS revisions...")
4502   sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4503             temp(DATAFILE + SORTED_REVS_SUFFIX))
4504   Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4505   Log().write(LOG_QUIET, "Done")
4506
4507 def pass4():
4508   """Iterate through sorted revs, storing them in a database.
4509   If we're not doing a trunk-only conversion, generate the
4510   LastSymbolicNameDatabase, which contains the last CVSRevision
4511   that is a source for each tag or branch.
4512   """
4513   Log().write(LOG_QUIET,
4514       "Copying CVS revision data from flat file to database...")
4515   cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4516   if not Ctx().trunk_only:
4517     Log().write(LOG_QUIET,
4518         "Finding last CVS revisions for all symbolic names...")
4519     last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4520   else:
4521     # This is to avoid testing Ctx().trunk_only every time around the loop
4522     class DummyLSNDB:
4523       def noop(*args): pass
4524       log_revision = noop
4525       create_database = noop
4526     last_sym_name_db = DummyLSNDB()
4527
4528   for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4529     c_rev = CVSRevision(Ctx(), line[:-1])
4530     cvs_revs_db.log_revision(c_rev)
4531     last_sym_name_db.log_revision(c_rev)
4532     StatsKeeper().record_c_rev(c_rev)
4533
4534   last_sym_name_db.create_database()
4535   StatsKeeper().archive()
4536   Log().write(LOG_QUIET, "Done")
4537
4538 def pass5():
4539   """
4540   Generate the SVNCommit <-> CVSRevision mapping
4541   databases.  CVSCommit._commit also calls SymbolingsLogger to register
4542   CVSRevisions that represent an opening or closing for a path on a
4543   branch or tag.  See SymbolingsLogger for more details.
4544   """
4545   Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4546
4547   aggregator = CVSRevisionAggregator()
4548   for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4549     c_rev = CVSRevision(Ctx(), line[:-1])
4550     if not (Ctx().trunk_only and c_rev.branch_name is not None):
4551       aggregator.process_revision(c_rev)
4552   aggregator.flush()
4553
4554   StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4555   StatsKeeper().archive()
4556   Log().write(LOG_QUIET, "Done")
4557
4558 def pass6():
4559   Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4560
4561   if not Ctx().trunk_only:
4562     sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4563               temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4564     Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4565   Log().write(LOG_QUIET, "Done")
4566
4567 def pass7():
4568   Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4569
4570   def generate_offsets_for_symbolings():
4571     """This function iterates through all the lines in
4572     SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4573     SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4574     where SYMBOLIC_NAME is first encountered.  This will allow us to
4575     seek to the various offsets in the file and sequentially read only
4576     the openings and closings that we need."""
4577
4578     ###PERF This is a fine example of a db that can be in-memory and
4579     #just flushed to disk when we're done.  Later, it can just be sucked
4580     #back into memory.
4581     offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4582     Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4583
4584     file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4585     old_sym = ""
4586     while 1:
4587       fpos = file.tell()
4588       line = file.readline()
4589       if not line:
4590         break
4591       sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4592       if sym != old_sym:
4593         Log().write(LOG_VERBOSE, " ", sym)
4594         old_sym = sym
4595         offsets_db[sym] = fpos
4596
4597   if not Ctx().trunk_only:
4598     generate_offsets_for_symbolings()
4599   Log().write(LOG_QUIET, "Done.")
4600
4601 def pass8():
4602   svncounter = 2 # Repository initialization is 1.
4603   repos = SVNRepositoryMirror()
4604   persistence_manager = PersistenceManager(DB_OPEN_READ)
4605
4606   if Ctx().target:
4607     if not Ctx().dry_run:
4608       repos.add_delegate(RepositoryDelegate())
4609     Log().write(LOG_QUIET, "Starting Subversion Repository.")
4610   else:
4611     if not Ctx().dry_run:
4612       repos.add_delegate(DumpfileDelegate())
4613     Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4614
4615   repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4616
4617   while 1:
4618     svn_commit = persistence_manager.get_svn_commit(svncounter)
4619     if not svn_commit:
4620       break
4621     repos.commit(svn_commit)
4622     svncounter += 1
4623
4624   repos.finish()
4625
4626 _passes = [
4627   pass1,
4628   pass2,
4629   pass3,
4630   pass4,
4631   pass5,
4632   pass6,
4633   pass7,
4634   pass8,
4635   ]
4636
4637
4638 class Ctx:
4639   """Session state for this run of cvs2svn.  For example, run-time
4640   options are stored here.  This class is a Borg, see
4641   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4642   """
4643   __shared_state = { }
4644   def __init__(self):
4645     self.__dict__ = self.__shared_state
4646     if self.__dict__:
4647       return
4648     # Else, initialize to defaults.
4649     self.cvsroot = None
4650     self.target = None
4651     self.dumpfile = DUMPFILE
4652     self.tmpdir = '.'
4653     self.verbose = 0
4654     self.quiet = 0
4655     self.prune = 1
4656     self.existing_svnrepos = 0
4657     self.dump_only = 0
4658     self.dry_run = 0
4659     self.trunk_only = 0
4660     self.trunk_base = "trunk"
4661     self.tags_base = "tags"
4662     self.branches_base = "branches"
4663     self.encoding = ["ascii"]
4664     self.mime_types_file = None
4665     self.no_default_eol = 0
4666     self.eol_from_mime_type = 0
4667     self.keywords_off = 0
4668     self.use_cvs = None
4669     self.svnadmin = "svnadmin"
4670     self.username = None
4671     self.print_help = 0
4672     self.skip_cleanup = 0
4673     self.bdb_txn_nosync = 0
4674     self.fs_type = None
4675     self.forced_branches = []
4676     self.forced_tags = []
4677     self.excludes = []
4678     self.symbol_transforms = []
4679     self.svn_property_setters = []
4680
4681
4682 class CVSRevisionNumberSetter(SVNPropertySetter):
4683   """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4684
4685   def set_properties(self, s_item):
4686     s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4687     s_item.svn_props_changed = True
4688
4689
4690 class MimeMapper(SVNPropertySetter):
4691   """A class that provides mappings from file names to MIME types."""
4692
4693   def __init__(self, mime_types_file):
4694     self.mappings = { }
4695
4696     for line in fileinput.input(mime_types_file):
4697       if line.startswith("#"):
4698         continue
4699
4700       # format of a line is something like
4701       # text/plain c h cpp
4702       extensions = line.split()
4703       if len(extensions) < 2:
4704         continue
4705       type = extensions.pop(0)
4706       for ext in extensions:
4707         if self.mappings.has_key(ext) and self.mappings[ext] != type:
4708           sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4709                            % (warning_prefix, ext, self.mappings[ext], type))
4710         self.mappings[ext] = type
4711
4712   def set_properties(self, s_item):
4713     basename, extension = os.path.splitext(
4714         os.path.basename(s_item.c_rev.cvs_path)
4715         )
4716
4717     # Extension includes the dot, so strip it (will leave extension
4718     # empty if filename ends with a dot, which is ok):
4719     extension = extension[1:]
4720
4721     # If there is no extension (or the file ends with a period), use
4722     # the base name for mapping.  This allows us to set mappings for
4723     # files such as README or Makefile:
4724     if not extension:
4725       extension = basename
4726
4727     mime_type = self.mappings.get(extension, None)
4728     if mime_type is not None:
4729       s_item.svn_props['svn:mime-type'] = mime_type
4730
4731
4732 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4733   """Set the default mime type for binary files, if no other one is known."""
4734
4735   def set_properties(self, s_item):
4736     if not s_item.svn_props.has_key('svn:mime-type') \
4737            and s_item.c_rev.mode == 'b':
4738       s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4739
4740
4741 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4742   """Set the eol-style for binary files to None."""
4743
4744   def set_properties(self, s_item):
4745     if s_item.c_rev.mode == 'b':
4746       s_item.svn_props['svn:eol-style'] = None
4747
4748
4749 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4750   """Set the eol-style from the mime type if it is not already known.
4751
4752   This setting is influenced by the mime-type setting, which must
4753   already have been set.  See also issue #39."""
4754
4755   def set_properties(self, s_item):
4756     if not s_item.svn_props.has_key('svn:eol-style') \
4757        and s_item.svn_props.get('svn:mime-type', None) is not None:
4758       if s_item.svn_props['svn:mime-type'].startswith("text/"):
4759         s_item.svn_props['svn:eol-style'] = 'native'
4760       else:
4761         s_item.svn_props['svn:eol-style'] = None
4762
4763
4764 class DefaultEOLStyleSetter(SVNPropertySetter):
4765   """Set the default eol-style if one has not already been set."""
4766
4767   def __init__(self, value):
4768     """Initialize with the specified default VALUE."""
4769
4770     self.value = value
4771
4772   def set_properties(self, s_item):
4773     if not s_item.svn_props.has_key('svn:eol-style'):
4774       s_item.svn_props['svn:eol-style'] = self.value
4775
4776
4777 class KeywordsPropertySetter(SVNPropertySetter):
4778   """Set the svn:keywords property based on the file's mode.  See
4779   issue #2."""
4780
4781   def __init__(self, value):
4782     """Use VALUE for the value of the svn:keywords property if it is
4783     to be set."""
4784
4785     self.value = value
4786
4787   def set_properties(self, s_item):
4788     if not s_item.svn_props.has_key('svn:keywords') \
4789            and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4790       s_item.svn_props['svn:keywords'] = self.value
4791
4792
4793 class ExecutablePropertySetter(SVNPropertySetter):
4794   """Set the svn:executable property based on c_rev.file_executable."""
4795
4796   def set_properties(self, s_item):
4797     if s_item.c_rev.file_executable:
4798       s_item.svn_props['svn:executable'] = '*'
4799
4800
4801 def convert(start_pass, end_pass):
4802   "Convert a CVS repository to an SVN repository."
4803
4804   cleanup = Cleanup()
4805   times = [ None ] * (end_pass + 1)
4806   times[start_pass - 1] = time.time()
4807   StatsKeeper().set_start_time(time.time())
4808   for i in range(start_pass - 1, end_pass):
4809     Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4810     _passes[i]()
4811     times[i + 1] = time.time()
4812     StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4813     # Dispose of items in Ctx() not intended to live past the end of the pass
4814     # (Identified by exactly one leading underscore)
4815     for attr in dir(Ctx()):
4816       if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4817           and attr[:6] != "_Ctx__"):
4818         delattr(Ctx(), attr)
4819     if not Ctx().skip_cleanup:
4820       cleanup.cleanup(_passes[i])
4821     StatsKeeper().set_end_time(time.time())
4822
4823   Log().write(LOG_QUIET, StatsKeeper())
4824   if end_pass < 4:
4825     Log().write(LOG_QUIET,
4826                 '(These are unaltered CVS repository stats and do not\n'
4827                 ' reflect tags or branches excluded via --exclude)\n')
4828   Log().write(LOG_NORMAL, StatsKeeper().timings())
4829
4830
4831 def normalize_ttb_path(opt, path):
4832   """Normalize a path to be used for --trunk, --tags, or --branches.
4833
4834   1. Strip leading, trailing, and duplicated '/'.
4835   2. Verify that the path is not empty.
4836
4837   Return the normalized path.
4838
4839   If the path is invalid, write an error message and exit."""
4840
4841   norm_path = _path_join(*path.split('/'))
4842   if not norm_path:
4843     raise FatalError("cannot pass an empty path to %s." % (opt,))
4844   return norm_path
4845
4846
4847 def verify_paths_disjoint(*paths):
4848   """Verify that all of the paths in the argument list are disjoint.
4849
4850   If any of the paths is nested in another one (i.e., in the sense
4851   that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4852   write an error message and exit."""
4853
4854   paths = [(path.split('/'), path) for path in paths]
4855   # If all overlapping elements are equal, a shorter list is
4856   # considered "less than" a longer one.  Therefore if any paths are
4857   # nested, this sort will leave at least one such pair adjacent, in
4858   # the order [nest,nestling].
4859   paths.sort()
4860   for i in range(1, len(paths)):
4861     split_path1, path1 = paths[i - 1]
4862     split_path2, path2 = paths[i]
4863     if len(split_path1) <= len(split_path2) \
4864        and split_path2[:len(split_path1)] == split_path1:
4865       raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
4866
4867
4868 def usage():
4869   print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4870         % os.path.basename(sys.argv[0])
4871   print '  --help, -h           print this usage message and exit with success'
4872   print '  --version            print the version number'
4873   print '  -q                   quiet'
4874   print '  -v                   verbose'
4875   print '  -s PATH              path for SVN repos'
4876   print '  -p START[:END]       start at pass START, end at pass END of %d' \
4877         % len(_passes)
4878   print '                       If only START is given, run only pass START'
4879   print '                       (implicitly enables --skip-cleanup)'
4880   print '  --existing-svnrepos  load into existing SVN repository'
4881   print '  --dumpfile=PATH      name of intermediate svn dumpfile'
4882   print '  --tmpdir=PATH        directory to use for tmp data (default to cwd)'
4883   print '  --profile            profile with \'hotshot\' (into file cvs2svn.hotshot)'
4884   print '  --dry-run            do not create a repository or a dumpfile;'
4885   print '                       just print what would happen.'
4886   print '  --use-cvs            use CVS instead of RCS \'co\' to extract data'
4887   print '                       (only use this if having problems with RCS)'
4888   print '  --svnadmin=PATH      path to the svnadmin program'
4889   print '  --trunk-only         convert only trunk commits, not tags nor branches'
4890   print '  --trunk=PATH         path for trunk (default: %s)'    \
4891         % Ctx().trunk_base
4892   print '  --branches=PATH      path for branches (default: %s)' \
4893         % Ctx().branches_base
4894   print '  --tags=PATH          path for tags (default: %s)'     \
4895         % Ctx().tags_base
4896   print '  --no-prune           don\'t prune empty directories'
4897   print '  --dump-only          just produce a dumpfile, don\'t commit to a repos'
4898   print '  --encoding=ENC       encoding of paths and log messages in CVS repos'
4899   print '                       Multiple of these options may be passed, where they'
4900   print '                       will be treated as an ordered list of encodings to'
4901   print '                       attempt (with "ascii" as a hardcoded last resort)'
4902   print '  --force-branch=NAME  force NAME to be a branch'
4903   print '  --force-tag=NAME     force NAME to be a tag'
4904   print '  --exclude=REGEXP     exclude branches and tags matching REGEXP'
4905   print '  --symbol-transform=P:S transform symbol names from P to S where P and S'
4906   print '                       use Python regexp and reference syntax respectively'
4907   print '  --username=NAME      username for cvs2svn-synthesized commits'
4908   print '  --skip-cleanup       prevent the deletion of intermediate files'
4909   print '  --bdb-txn-nosync     pass --bdb-txn-nosync to "svnadmin create"'
4910   print '  --fs-type=TYPE       pass --fs-type=TYPE to "svnadmin create"'
4911   print '  --cvs-revnums        record CVS revision numbers as file properties'
4912   print '  --mime-types=FILE    specify an apache-style mime.types file for'
4913   print '                       setting svn:mime-type'
4914   print '  --eol-from-mime-type set svn:eol-style from mime type if known'
4915   print '  --no-default-eol     don\'t set svn:eol-style to \'native\' for'
4916   print '                       non-binary files with undetermined mime types'
4917   print '  --keywords-off       don\'t set svn:keywords on any files (by default,'
4918   print '                       cvs2svn sets svn:keywords on non-binary files to'
4919   print '                       "%s")' % SVN_KEYWORDS_VALUE
4920
4921 def main():
4922   # Convenience var, so we don't have to keep instantiating this Borg.
4923   ctx = Ctx()
4924
4925   profiling = None
4926   start_pass = 1
4927   end_pass = len(_passes)
4928
4929   try:
4930     opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4931                                [ "help", "create", "trunk=",
4932                                  "username=", "existing-svnrepos",
4933                                  "branches=", "tags=", "encoding=",
4934                                  "force-branch=", "force-tag=", "exclude=",
4935                                  "use-cvs", "mime-types=",
4936                                  "eol-from-mime-type", "no-default-eol",
4937                                  "trunk-only", "no-prune", "dry-run",
4938                                  "dump-only", "dumpfile=", "tmpdir=",
4939                                  "svnadmin=", "skip-cleanup", "cvs-revnums",
4940                                  "bdb-txn-nosync", "fs-type=",
4941                                  "version", "profile",
4942                                  "keywords-off", "symbol-transform="])
4943   except getopt.GetoptError, e:
4944     sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4945     usage()
4946     sys.exit(1)
4947
4948   for opt, value in opts:
4949     if opt == '--version':
4950         print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4951         sys.exit(0)
4952     elif opt == '-p':
4953       # Don't cleanup if we're doing incrementals.
4954       ctx.skip_cleanup = 1
4955       if value.find(':') > 0:
4956         start_pass, end_pass = map(int, value.split(':'))
4957       else:
4958         end_pass = start_pass = int(value)
4959       if start_pass > len(_passes) or start_pass < 1:
4960         raise FatalError(
4961             'illegal value (%d) for starting pass.  Must be 1 through %d.'
4962             % (int(start_pass), len(_passes),))
4963       if end_pass < start_pass or end_pass > len(_passes):
4964         raise FatalError(
4965             'illegal value (%d) for ending pass.  Must be %d through %d.'
4966             % (int(end_pass), int(start_pass), len(_passes),))
4967     elif (opt == '--help') or (opt == '-h'):
4968       ctx.print_help = 1
4969     elif opt == '-v':
4970       Log().log_level = LOG_VERBOSE
4971       ctx.verbose = 1
4972     elif opt == '-q':
4973       Log().log_level = LOG_QUIET
4974       ctx.quiet = 1
4975     elif opt == '-s':
4976       ctx.target = value
4977     elif opt == '--existing-svnrepos':
4978       ctx.existing_svnrepos = 1
4979     elif opt == '--dumpfile':
4980       ctx.dumpfile = value
4981     elif opt == '--tmpdir':
4982       ctx.tmpdir = value
4983     elif opt == '--use-cvs':
4984       ctx.use_cvs = 1
4985     elif opt == '--svnadmin':
4986       ctx.svnadmin = value
4987     elif opt == '--trunk-only':
4988       ctx.trunk_only = 1
4989     elif opt == '--trunk':
4990       ctx.trunk_base = normalize_ttb_path(opt, value)
4991     elif opt == '--branches':
4992       ctx.branches_base = normalize_ttb_path(opt, value)
4993     elif opt == '--tags':
4994       ctx.tags_base = normalize_ttb_path(opt, value)
4995     elif opt == '--no-prune':
4996       ctx.prune = None
4997     elif opt == '--dump-only':
4998       ctx.dump_only = 1
4999     elif opt == '--dry-run':
5000       ctx.dry_run = 1
5001     elif opt == '--encoding':
5002       ctx.encoding.insert(-1, value)
5003     elif opt == '--force-branch':
5004       ctx.forced_branches.append(value)
5005     elif opt == '--force-tag':
5006       ctx.forced_tags.append(value)
5007     elif opt == '--exclude':
5008       try:
5009         ctx.excludes.append(re.compile('^' + value + '$'))
5010       except re.error, e:
5011         raise FatalError("'%s' is not a valid regexp." % (value,))
5012     elif opt == '--mime-types':
5013       ctx.mime_types_file = value
5014     elif opt == '--eol-from-mime-type':
5015       ctx.eol_from_mime_type = 1
5016     elif opt == '--no-default-eol':
5017       ctx.no_default_eol = 1
5018     elif opt == '--keywords-off':
5019       ctx.keywords_off = 1
5020     elif opt == '--username':
5021       ctx.username = value
5022     elif opt == '--skip-cleanup':
5023       ctx.skip_cleanup = 1
5024     elif opt == '--cvs-revnums':
5025       ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5026     elif opt == '--bdb-txn-nosync':
5027       ctx.bdb_txn_nosync = 1
5028     elif opt == '--fs-type':
5029       ctx.fs_type = value
5030     elif opt == '--create':
5031       sys.stderr.write(warning_prefix +
5032           ': The behaviour produced by the --create option is now the '
5033           'default,\nand passing the option is deprecated.\n')
5034     elif opt == '--profile':
5035       profiling = 1
5036     elif opt == '--symbol-transform':
5037       [pattern, replacement] = value.split(":")
5038       try:
5039         pattern = re.compile(pattern)
5040       except re.error, e:
5041         raise FatalError("'%s' is not a valid regexp." % (pattern,))
5042       ctx.symbol_transforms.append((pattern, replacement,))
5043
5044   if ctx.print_help:
5045     usage()
5046     sys.exit(0)
5047
5048   # Consistency check for options and arguments.
5049   if len(args) == 0:
5050     usage()
5051     sys.exit(1)
5052
5053   if len(args) > 1:
5054     sys.stderr.write(error_prefix +
5055                      ": must pass only one CVS repository.\n")
5056     usage()
5057     sys.exit(1)
5058
5059   ctx.cvsroot = args[0]
5060
5061   if ctx.use_cvs:
5062     ctx.cvs_repository = CVSRepositoryViaCVS(ctx.cvsroot)
5063   else:
5064     ctx.cvs_repository = CVSRepositoryViaRCS(ctx.cvsroot)
5065
5066   if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5067     raise FatalError("must pass one of '-s' or '--dump-only'.")
5068
5069   def not_both(opt1val, opt1name, opt2val, opt2name):
5070     if opt1val and opt2val:
5071       raise FatalError("cannot pass both '%s' and '%s'."
5072                        % (opt1name, opt2name,))
5073
5074   not_both(ctx.target, '-s',
5075            ctx.dump_only, '--dump-only')
5076
5077   not_both(ctx.dump_only, '--dump-only',
5078            ctx.existing_svnrepos, '--existing-svnrepos')
5079
5080   not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5081            ctx.existing_svnrepos, '--existing-svnrepos')
5082
5083   not_both(ctx.dump_only, '--dump-only',
5084            ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5085
5086   not_both(ctx.quiet, '-q',
5087            ctx.verbose, '-v')
5088
5089   not_both(ctx.fs_type, '--fs-type',
5090            ctx.existing_svnrepos, '--existing-svnrepos')
5091
5092   if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5093     raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5094                      % ctx.fs_type)
5095
5096   # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5097   ctx.project = Project(ctx.cvsroot,
5098                         ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5099
5100   if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5101     raise FatalError("the svn-repos-path '%s' is not an "
5102                      "existing directory." % ctx.target)
5103
5104   if not ctx.dump_only and not ctx.existing_svnrepos \
5105      and (not ctx.dry_run) and os.path.exists(ctx.target):
5106     raise FatalError("the svn-repos-path '%s' exists.\n"
5107                      "Remove it, or pass '--existing-svnrepos'."
5108                      % ctx.target)
5109
5110   if ctx.target and not ctx.dry_run:
5111     # Verify that svnadmin can be executed.  The 'help' subcommand
5112     # should be harmless.
5113     try:
5114       check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5115     except CommandFailedException, e:
5116       raise FatalError(
5117           '%s\n'
5118           'svnadmin could not be executed.  Please ensure that it is\n'
5119           'installed and/or use the --svnadmin option.' % (e,))
5120
5121   if ctx.mime_types_file:
5122     ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5123
5124   ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5125   ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5126
5127   if ctx.eol_from_mime_type:
5128     ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5129
5130   if ctx.no_default_eol:
5131     ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5132   else:
5133     ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5134
5135   if not ctx.keywords_off:
5136     ctx.svn_property_setters.append(
5137         KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5138
5139   ctx.svn_property_setters.append(ExecutablePropertySetter())
5140
5141   # Make sure the tmp directory exists.  Note that we don't check if
5142   # it's empty -- we want to be able to use, for example, "." to hold
5143   # tempfiles.  But if we *did* want check if it were empty, we'd do
5144   # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5145   if not os.path.exists(ctx.tmpdir):
5146     os.mkdir(ctx.tmpdir)
5147   elif not os.path.isdir(ctx.tmpdir):
5148     raise FatalError(
5149         "cvs2svn tried to use '%s' for temporary files, but that path\n"
5150         "  exists and is not a directory.  Please make it be a directory,\n"
5151         "  or specify some other directory for temporary files."
5152         % (ctx.tmpdir,))
5153
5154   # But do lock the tmpdir, to avoid process clash.
5155   try:
5156     os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5157   except OSError, e:
5158     if e.errno == errno.EACCES:
5159       raise FatalError("Permission denied:"
5160                        + " No write access to directory '%s'." % ctx.tmpdir)
5161     if e.errno == errno.EEXIST:
5162       raise FatalError(
5163           "cvs2svn is using directory '%s' for temporary files, but\n"
5164           "  subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5165           "  cvs2svn process is currently using '%s' as its temporary\n"
5166           "  workspace.  If you are certain that is not the case,\n"
5167           "  then remove the '%s/cvs2svn.lock' subdirectory."
5168           % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5169     raise
5170   try:
5171     if profiling:
5172       import hotshot
5173       prof = hotshot.Profile('cvs2svn.hotshot')
5174       prof.runcall(convert, start_pass, end_pass)
5175       prof.close()
5176     else:
5177       convert(start_pass, end_pass)
5178   finally:
5179     try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5180     except: pass
5181
5182
5183 if __name__ == '__main__':
5184   try:
5185     main()
5186   except FatalException, e:
5187     sys.stderr.write(str(e))
5188     sys.exit(1)
5189
5190