cvs2svn.py

   1 #!/usr/bin/env python
   2 #
   3 # cvs2svn: ...
   4 #
   5 # $LastChangedRevision$
   6 #
   7 # ====================================================================
   8 # Copyright (c) 2000-2004 CollabNet.  All rights reserved.
   9 #
  10 # This software is licensed as described in the file COPYING, which
  11 # you should have received as part of this distribution.  The terms
  12 # are also available at http://subversion.tigris.org/license-1.html.
  13 # If newer versions of this license are posted there, you may use a
  14 # newer version instead, at your option.
  15 #
  16 # This software consists of voluntary contributions made by many
  17 # individuals.  For exact contribution history, see the revision
  18 # history and logs, available at http://cvs2svn.tigris.org/.
  19 # ====================================================================
  20
  21 import rcsparse
  22 import os
  23 import sys
  24 import sha
  25 import re
  26 import time
  27 import fileinput
  28 import string
  29 import getopt
  30 import stat
  31 import string
  32 import md5
  33 import anydbm
  34 import marshal
  35
  36 # Warnings and errors start with these strings.  They are typically
  37 # followed by a colon and a space, as in "%s: " ==> "Warning: ".
  38 warning_prefix = "Warning"
  39 error_prefix = "Error"
  40
  41 # Make sure this Python is recent enough.
  42 if sys.hexversion < 0x2000000:
  43   sys.stderr.write("'%s: Python 2.0 or higher required, "
  44                    "see www.python.org.\n" % error_prefix)
  45   sys.exit(1)
  46
  47 # Don't settle for less.
  48 if (anydbm._defaultmod.__name__ == 'dumbdbm'
  49     or anydbm._defaultmod.__name__ == 'dbm'):
  50   print 'ERROR: your installation of Python does not contain a suitable'
  51   print '  DBM module. This script cannot continue.'
  52   print '  to solve: see http://python.org/doc/current/lib/module-anydbm.html'
  53   print '  for details.'
  54   sys.exit(1)
  55
  56 if hasattr(anydbm._defaultmod, 'bsddb') \
  57     and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
  58   try:
  59     gdbm = __import__('gdbm')
  60   except ImportError:
  61     sys.stderr.write(warning_prefix +
  62         ': The version of the bsddb module found '
  63         'on your computer has been reported to malfunction on some datasets, '
  64         'causing KeyError exceptions. You may wish to upgrade your Python to '
  65         'version 2.3 or later.\n')
  66   else:
  67     anydbm._defaultmod = gdbm
  68
  69 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
  70 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
  71 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
  72
  73 # This really only matches standard '1.1.1.*'-style vendor revisions.
  74 # One could conceivably have a file whose default branch is 1.1.3 or
  75 # whatever, or was that at some point in time, with vendor revisions
  76 # 1.1.3.1, 1.1.3.2, etc.  But with the default branch gone now (which
  77 # is the only time this regexp gets used), we'd have no basis for
  78 # assuming that the non-standard vendor branch had ever been the
  79 # default branch anyway, so we don't want this to match them anyway.
  80 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
  81
  82 DATAFILE = 'cvs2svn-data'
  83 DUMPFILE = 'cvs2svn-dump'  # The "dumpfile" we create to load into the repos
  84
  85 # Skeleton version of an svn filesystem.
  86 # See class RepositoryMirror for how these work.
  87 SVN_REVISIONS_DB = 'cvs2svn-revisions.db'
  88 NODES_DB = 'cvs2svn-nodes.db'
  89
  90 # os.popen() on Windows seems to require an access-mode string of 'rb'
  91 # in cases where the process will output binary information to stdout.
  92 # Without the 'b' we get IOErrors upon closing the pipe.  Unfortunately
  93 # 'rb' isn't accepted in the Linux version of os.popen().  As a purely
  94 # practical matter, we compensate by switching on os.name.
  95 if os.name == 'nt':
  96   PIPE_READ_MODE = 'rb'
  97   PIPE_WRITE_MODE = 'wb'
  98 else:
  99   PIPE_READ_MODE = 'r'
 100   PIPE_WRITE_MODE = 'w'
 101
 102 # Record the default RCS branches, if any, for CVS filepaths.
 103 #
 104 # The keys are CVS filepaths, relative to the top of the repository
 105 # and with the ",v" stripped off, so they match the cvs paths used in
 106 # Commit.commit().  The values are vendor branch revisions, such as
 107 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'.  The vendor branch revision
 108 # represents the highest vendor branch revision thought to have ever
 109 # been head of the default branch.
 110 #
 111 # The reason we record a specific vendor revision, rather than a
 112 # default branch number, is that there are two cases to handle:
 113 #
 114 # One case is simple.  The RCS file lists a default branch explicitly
 115 # in its header, such as '1.1.1'.  In this case, we know that every
 116 # revision on the vendor branch is to be treated as head of trunk at
 117 # that point in time.
 118 #
 119 # But there's also a degenerate case.  The RCS file does not currently
 120 # have a default branch, yet we can deduce that for some period in the
 121 # past it probably *did* have one.  For example, the file has vendor
 122 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
 123 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2.  In this
 124 # case, we should record 1.1.1.96 as the last vendor revision to have
 125 # been the head of the default branch.
 126 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
 127
 128 # Records the origin ranges for branches and tags.
 129 # See class RepositoryMirror for how this works.
 130 SYMBOLIC_NAME_ROOTS_DB = 'cvs2svn-symroots.db'
 131
 132 # See class SymbolicNameTracker for details.
 133 SYMBOLIC_NAMES_DB = "cvs2svn-sym-names.db"
 134
 135 # Records the author and log message for each changeset.
 136 # The keys are author+log digests, the same kind used to identify
 137 # unique revisions in the .revs, etc files.  Each value is a tuple
 138 # of two elements: '(author logmessage)'.
 139 METADATA_DB = "cvs2svn-metadata.db"
 140
 141 REVS_SUFFIX = '.revs'
 142 CLEAN_REVS_SUFFIX = '.c-revs'
 143 SORTED_REVS_SUFFIX = '.s-revs'
 144 RESYNC_SUFFIX = '.resync'
 145
 146 ATTIC = os.sep + 'Attic'
 147
 148 SVN_INVALID_REVNUM = -1
 149
 150 COMMIT_THRESHOLD = 5 * 60       # flush a commit if a 5 minute gap occurs
 151
 152 # Things that can happen to a file.
 153 OP_NOOP   = '-'
 154 OP_ADD    = 'A'
 155 OP_DELETE = 'D'
 156 OP_CHANGE = 'C'
 157
 158 # A deltatext either does or doesn't represent some change.
 159 DELTATEXT_NONEMPTY = 'N'
 160 DELTATEXT_EMPTY    = 'E'
 161
 162 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
 163
 164 # Officially, CVS symbolic names must use a fairly restricted set of
 165 # characters.  Unofficially, CVS 1.10 allows any character but [$,.:;@]
 166 # We don't care if some repositories out there use characters outside the
 167 # official set, as long as their tags start with a letter.
 168 # Since the unofficial set also includes [/\] we need to translate those
 169 # into ones that don't conflict with Subversion limitations.
 170 symbolic_name_re = re.compile('^[a-zA-Z].*$')
 171 symbolic_name_transtbl = string.maketrans('/\\',',;')
 172
 173 # A wrapper for anydbm that uses the marshal module to store items as
 174 # strings.
 175 class Database:
 176   def __init__(self, filename, mode):
 177     self.db = anydbm.open(filename, mode)
 178
 179   def has_key(self, key):
 180     return self.db.has_key(key)
 181
 182   def __getitem__(self, key):
 183     return marshal.loads(self.db[key])
 184
 185   def __setitem__(self, key, value):
 186     self.db[key] = marshal.dumps(value)
 187
 188   def __delitem__(self, key):
 189     del self.db[key]
 190
 191 class CollectData(rcsparse.Sink):
 192   def __init__(self, cvsroot, log_fname_base, default_branches_db):
 193     self.cvsroot = cvsroot
 194     self.revs = open(log_fname_base + REVS_SUFFIX, 'w')
 195     self.resync = open(log_fname_base + RESYNC_SUFFIX, 'w')
 196     self.default_branches_db = default_branches_db
 197     self.metadata_db = Database(METADATA_DB, 'n')
 198     self.fatal_errors = []
 199
 200     # Branch and tag label types.
 201     self.BRANCH_LABEL = 0
 202     self.VENDOR_BRANCH_LABEL = 1
 203     self.TAG_LABEL = 2
 204     # A label type to string conversion list
 205     self.LABEL_TYPES = [ 'branch', 'vendor branch', 'tag' ]
 206     # A dict mapping label names to types
 207     self.label_type = { }
 208
 209     # See set_fname() for initializations of other variables.
 210
 211   def set_fname(self, fname):
 212     "Prepare to receive data for a new file."
 213     self.fname = fname
 214
 215     # revision -> [timestamp, author, operation, old-timestamp]
 216     self.rev_data = { }
 217     self.prev = { }
 218
 219     # Hash mapping branch numbers, like '1.7.2', to branch names,
 220     # like 'Release_1_0_dev'.
 221     self.branch_names = { }
 222
 223     # Hash mapping revision numbers, like '1.7', to lists of names
 224     # indicating which branches sprout from that revision, like
 225     # ['Release_1_0_dev', 'experimental_driver', ...].
 226     self.branchlist = { }
 227
 228     # Like self.branchlist, but the values are lists of tag names that
 229     # apply to the key revision.
 230     self.taglist = { }
 231
 232     # This is always a number -- rcsparse calls this the "principal
 233     # branch", but CVS and RCS refer to it as the "default branch",
 234     # so that's what we call it, even though the rcsparse API setter
 235     # method is still 'set_principal_branch'.
 236     self.default_branch = None
 237
 238     # If the RCS file doesn't have a default branch anymore, but does
 239     # have vendor revisions, then we make an educated guess that those
 240     # revisions *were* the head of the default branch up until the
 241     # commit of 1.2, at which point the file's default branch became
 242     # trunk.  This records the date at which 1.2 was committed.
 243     self.first_non_vendor_revision_date = None
 244
 245   def set_principal_branch(self, branch):
 246     self.default_branch = branch
 247
 248   def set_branch_name(self, branch_number, name):
 249     """Record that BRANCH_NUMBER is the branch number for branch NAME,
 250     and that NAME sprouts from BRANCH_NUMBER .
 251     BRANCH_NUMBER is an RCS branch number with an odd number of components,
 252     for example '1.7.2' (never '1.7.0.2')."""
 253     if not self.branch_names.has_key(branch_number):
 254       self.branch_names[branch_number] = name
 255       # The branchlist is keyed on the revision number from which the
 256       # branch sprouts, so strip off the odd final component.
 257       sprout_rev = branch_number[:branch_number.rfind(".")]
 258       if not self.branchlist.has_key(sprout_rev):
 259         self.branchlist[sprout_rev] = []
 260       self.branchlist[sprout_rev].append(name)
 261     else:
 262       sys.stderr.write("%s: in '%s':\n"
 263                        "   branch '%s' already has name '%s',\n"
 264                        "   cannot also have name '%s', ignoring the latter\n"
 265                        % (warning_prefix, self.fname, branch_number,
 266                           self.branch_names[branch_number], name))
 267
 268   def rev_to_branch_name(self, revision):
 269     """Return the name of the branch on which REVISION lies.
 270     REVISION is a non-branch revision number with an even number of,
 271     components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
 272     For the convenience of callers, REVISION can also be a trunk
 273     revision such as '1.2', in which case just return None."""
 274     if trunk_rev.match(revision):
 275       return None
 276     return self.branch_names.get(revision[:revision.rindex(".")])
 277
 278   def add_cvs_branch(self, revision, branch_name):
 279     """Record the root revision and branch revision for BRANCH_NAME,
 280     based on REVISION.  REVISION is a CVS branch number having an even
 281     number of components where the second-to-last is '0'.  For
 282     example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
 283     from 1.7 and has branch number 1.7.2."""
 284     last_dot = revision.rfind(".")
 285     branch_rev = revision[:last_dot]
 286     last2_dot = branch_rev.rfind(".")
 287     branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
 288     self.set_branch_name(branch_rev, branch_name)
 289
 290   def get_tags(self, revision):
 291     """Return a list of all tag names attached to REVISION.
 292     REVISION is a regular revision number like '1.7', and the result
 293     never includes branch names, only plain tags."""
 294     return self.taglist.get(revision, [])
 295
 296   def get_branches(self, revision):
 297     """Return a list of all branch names that sprout from REVISION.
 298     REVISION is a regular revision number like '1.7'."""
 299     return self.branchlist.get(revision, [])
 300
 301   def define_tag(self, name, revision):
 302     """Record a bidirectional mapping between symbolic NAME and REVISION.
 303     REVISION is an unprocessed revision number from the RCS file's
 304     header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
 305     This function will determine what kind of symbolic name it is by
 306     inspection, and record it in the right places."""
 307     if not symbolic_name_re.match(name):
 308       sys.stderr.write("%s: in '%s':\n"
 309                        "   '%s' is not a valid tag or branch name, ignoring\n"
 310                        % (warning_prefix, self.fname, name))
 311     elif branch_tag.match(revision):
 312       label_type = self.BRANCH_LABEL
 313       self.add_cvs_branch(revision, name)
 314     elif vendor_tag.match(revision):
 315       label_type = self.VENDOR_BRANCH_LABEL
 316       self.set_branch_name(revision, name)
 317     else:
 318       label_type = self.TAG_LABEL
 319       if not self.taglist.has_key(revision):
 320         self.taglist[revision] = []
 321       self.taglist[revision].append(name)
 322
 323     try:
 324       # if label_types are different and at least one is a tag (We
 325       # don't want to error on branch/vendor branch mismatches)
 326       if (self.label_type[name] != label_type
 327           and(self.label_type[name] == self.TAG_LABEL
 328               or label_type == self.TAG_LABEL)):
 329         err = ("%s: in '%s' (BRANCH/TAG MISMATCH):\n   '%s' "
 330                " is defined as %s here, but as a %s elsewhere"
 331                % (error_prefix, self.fname, name,
 332                   self.LABEL_TYPES[label_type],
 333                   self.LABEL_TYPES[self.label_type[name]]))
 334         sys.stderr.write(err)
 335         self.fatal_errors.append(err)
 336     except KeyError:
 337       self.label_type[name] = label_type
 338
 339   def define_revision(self, revision, timestamp, author, state,
 340                       branches, next):
 341     ### what else?
 342     if state == 'dead':
 343       op = OP_DELETE
 344     else:
 345       op = OP_CHANGE
 346
 347     # store the rev_data as a list in case we have to jigger the timestamp
 348     self.rev_data[revision] = [int(timestamp), author, op, None]
 349
 350     # record the previous revision for sanity checking later
 351     if trunk_rev.match(revision):
 352       self.prev[revision] = next
 353     elif next:
 354       self.prev[next] = revision
 355     for b in branches:
 356       self.prev[b] = revision
 357
 358     # Ratchet up the highest vendor head revision, if necessary.
 359     if self.default_branch:
 360       if revision.find(self.default_branch) == 0:
 361         # This revision is on the default branch, so record that it is
 362         # the new highest vendor head revision.
 363         rel_name = relative_name(self.cvsroot, self.fname)[:-2]
 364         self.default_branches_db[rel_name] = revision
 365     else:
 366       # No default branch, so make an educated guess.
 367       if revision == '1.2':
 368         # This is probably the time when the file stopped having a
 369         # default branch, so make a note of it.
 370         self.first_non_vendor_revision_date = timestamp
 371       else:
 372         m = vendor_revision.match(revision)
 373         if m and ((not self.first_non_vendor_revision_date)
 374                   or (timestamp < self.first_non_vendor_revision_date)):
 375           # We're looking at a vendor revision, and it wasn't
 376           # committed after this file lost its default branch, so bump
 377           # the maximum trunk vendor revision in the permanent record.
 378           rel_name = relative_name(self.cvsroot, self.fname)[:-2]
 379           self.default_branches_db[rel_name] = revision
 380
 381     # Check for unlabeled branches, record them.  We tried to collect
 382     # all branch names when we parsed the symbolic name header
 383     # earlier, of course, but that didn't catch unlabeled branches.
 384     # If a branch is unlabeled, this is our first encounter with it,
 385     # so we have to record its data now.
 386     if not trunk_rev.match(revision):
 387       branch_number = revision[:revision.rindex(".")]
 388       branch_name = "unlabeled-" + branch_number
 389       if not self.branch_names.has_key(branch_number):
 390         self.set_branch_name(branch_number, branch_name)
 391
 392   def tree_completed(self):
 393     "The revision tree has been parsed. Analyze it for consistency."
 394
 395     # Our algorithm depends upon the timestamps on the revisions occuring
 396     # monotonically over time. That is, we want to see rev 1.34 occur in
 397     # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
 398     # sorting), and then tried to insert 1.34, we'd be screwed.
 399
 400     # to perform the analysis, we'll simply visit all of the 'previous'
 401     # links that we have recorded and validate that the timestamp on the
 402     # previous revision is before the specified revision
 403
 404     # if we have to resync some nodes, then we restart the scan. just keep
 405     # looping as long as we need to restart.
 406     while 1:
 407       for current, prev in self.prev.items():
 408         if not prev:
 409           # no previous revision exists (i.e. the initial revision)
 410           continue
 411         t_c = self.rev_data[current][0]
 412         t_p = self.rev_data[prev][0]
 413         if t_p >= t_c:
 414           # the previous revision occurred later than the current revision.
 415           # shove the previous revision back in time (and any before it that
 416           # may need to shift).
 417           while t_p >= t_c:
 418             self.rev_data[prev][0] = t_c - 1    # new timestamp
 419             self.rev_data[prev][3] = t_p        # old timestamp
 420
 421             print "RESYNC: '%s' (%s) : old time='%s' new time='%s'" \
 422                   % (relative_name(self.cvsroot, self.fname),
 423                      prev, time.ctime(t_p), time.ctime(t_c - 1))
 424
 425             current = prev
 426             prev = self.prev[current]
 427             if not prev:
 428               break
 429             t_c = t_c - 1               # self.rev_data[current][0]
 430             t_p = self.rev_data[prev][0]
 431
 432           # break from the for-loop
 433           break
 434       else:
 435         # finished the for-loop (no resyncing was performed)
 436         return
 437
 438   def set_revision_info(self, revision, log, text):
 439     timestamp, author, op, old_ts = self.rev_data[revision]
 440     digest = sha.new(log + '\0' + author).hexdigest()
 441     if old_ts:
 442       # the timestamp on this revision was changed. log it for later
 443       # resynchronization of other files's revisions that occurred
 444       # for this time and log message.
 445       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
 446
 447     # "...Give back one kadam to honor the Hebrew God whose Ark this is."
 448     #       -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
 449     #
 450     # If revision 1.1 appears to have been created via 'cvs add'
 451     # instead of 'cvs import', then this file probably never had a
 452     # default branch, so retroactively remove its record in the
 453     # default branches db.  The test is that the log message CVS uses
 454     # for 1.1 in imports is "Initial revision\n" with no period.
 455     if revision == '1.1' and log != 'Initial revision\n':
 456       rel_name = relative_name(self.cvsroot, self.fname)[:-2]
 457       if self.default_branches_db.has_key(rel_name):
 458         del self.default_branches_db[rel_name]
 459
 460     if text:
 461       deltatext_code = DELTATEXT_NONEMPTY
 462     else:
 463       deltatext_code = DELTATEXT_EMPTY
 464
 465     write_revs_line(self.revs, timestamp, digest, op, revision,
 466                     deltatext_code, self.fname,
 467                     self.rev_to_branch_name(revision),
 468                     self.get_tags(revision),
 469                     self.get_branches(revision))
 470
 471     if not self.metadata_db.has_key(digest):
 472       self.metadata_db[digest] = (author, log)
 473
 474 def run_command(command):
 475   if os.system(command):
 476     sys.exit('Command failed: "%s"' % command)
 477
 478 def make_path(ctx, path, branch_name = None, tag_name = None):
 479   """Return the trunk path, branch path, or tag path for PATH.
 480   CTX holds the name of the branches or tags directory, which is
 481   prepended to PATH when constructing a branch or tag path.
 482
 483   If PATH is empty or None, return the root trunk|branch|tag path.
 484
 485   It is an error to pass both a BRANCH_NAME and a TAG_NAME."""
 486
 487   # For a while, we treated each top-level subdir of the CVS
 488   # repository as a "project root" and interpolated the appropriate
 489   # genealogy (trunk|tag|branch) in according to the official
 490   # recommended layout.  For example, the path '/foo/bar/baz.c' on
 491   # branch 'Rel2' would become
 492   #
 493   #   /foo/branches/Rel2/bar/baz.c
 494   #
 495   # and on trunk it would become
 496   #
 497   #   /foo/trunk/bar/baz.c
 498   #
 499   # However, we went back to the older and simpler method of just
 500   # prepending the genealogy to the front, instead of interpolating.
 501   # So now we produce:
 502   #
 503   #   /branches/Rel2/foo/bar/baz.c
 504   #   /trunk/foo/bar/baz.c
 505   #
 506   # Why?  Well, Jack Repenning pointed out that this way is much
 507   # friendlier to "anonymously rooted subtrees" (that's a tree where
 508   # the name of the top level dir doesn't matter, the point is that if
 509   # you cd into it and, say, run 'make', something good will happen).
 510   # By interpolating, we made it impossible to point cvs2svn at some
 511   # subdir in the CVS repository and convert it as a project, because
 512   # we'd treat every subdir underneath it as an independent project
 513   # root, which is probably not what the user wanted.
 514   #
 515   # Also, see Blair Zajac's post
 516   #
 517   #    http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
 518   #
 519   # and the surrounding thread, for why what people really want is a
 520   # way of specifying an in-repository prefix path, not interpolation.
 521
 522   # Check caller sanity.
 523   if branch_name and tag_name:
 524     sys.stderr.write("%s: make_path() miscalled: both branch and tag given.\n"
 525                      % error_prefix)
 526     sys.exit(1)
 527
 528   if branch_name:
 529     branch_name = branch_name.translate(symbolic_name_transtbl)
 530     if path:
 531       return ctx.branches_base + '/' + branch_name + '/' + path
 532     else:
 533       return ctx.branches_base + '/' + branch_name
 534   elif tag_name:
 535     tag_name = tag_name.translate(symbolic_name_transtbl)
 536     if path:
 537       return ctx.tags_base + '/' + tag_name + '/' + path
 538     else:
 539       return ctx.tags_base + '/' + tag_name
 540   else:
 541     if path:
 542       return ctx.trunk_base + '/' + path
 543     else:
 544       return ctx.trunk_base
 545
 546
 547 def relative_name(cvsroot, fname):
 548   l = len(cvsroot)
 549   if fname[:l] == cvsroot:
 550     if fname[l] == os.sep:
 551       return string.replace(fname[l+1:], os.sep, '/')
 552     return string.replace(fname[l:], os.sep, '/')
 553   sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
 554                    " cvsroot\n" % (error_prefix, cvsroot, fname))
 555   sys.exit(1)
 556
 557
 558 def visit_file(arg, dirname, files):
 559   cd, p, stats = arg
 560   for fname in files:
 561     if fname[-2:] != ',v':
 562       continue
 563     pathname = os.path.join(dirname, fname)
 564     if dirname[-6:] == ATTIC:
 565       # drop the 'Attic' portion from the pathname
 566       ### we should record this so we can easily insert it back in
 567       cd.set_fname(os.path.join(dirname[:-6], fname))
 568     else:
 569       cd.set_fname(pathname)
 570     print pathname
 571     try:
 572       p.parse(open(pathname, 'rb'), cd)
 573       stats[0] = stats[0] + 1
 574     except (rcsparse.common.RCSParseError, ValueError, RuntimeError):
 575       err = "%s: '%s' is not a valid ,v file" \
 576             % (error_prefix, pathname)
 577       print err
 578       cd.fatal_errors.append(err)
 579     except:
 580       print "Exception occurred while parsing %s" % pathname
 581       raise
 582
 583
 584 # Return a string that has not been returned by gen_key() before.
 585 gen_key_base = 0L
 586 def gen_key():
 587   global gen_key_base
 588   key = '%x' % gen_key_base
 589   gen_key_base = gen_key_base + 1
 590   return key
 591
 592
 593 class Change:
 594   """Class for recording what actually happened when a change is made,
 595   because not all of the result is guessable by the caller.
 596   See RepositoryMirror.change_path() for more.
 597
 598   The fields are
 599
 600     op:
 601        OP_ADD path was added, OP_CHANGE if changed, or OP_NOOP if no
 602        action.
 603
 604     closed_tags:
 605        List of tags that this path can no longer be the source of,
 606        that is, tags which could be rooted in the path before the
 607        change, but not after.
 608
 609     closed_branches:
 610        Like closed_tags, but for branches.
 611
 612     deleted_entries:
 613        The list of entries deleted from the destination after
 614        copying a directory, or None.
 615
 616     copyfrom_rev:
 617        The actual revision from which the path was copied, which
 618        may be one less than the requested revision when the path
 619        was deleted in the requested revision, or None."""
 620   def __init__(self, op, closed_tags, closed_branches,
 621                deleted_entries=None, copyfrom_rev=None):
 622     self.op = op
 623     self.closed_tags = closed_tags
 624     self.closed_branches = closed_branches
 625     self.deleted_entries = deleted_entries
 626     self.copyfrom_rev = copyfrom_rev
 627
 628
 629 class RepositoryMirror:
 630   def __init__(self):
 631     # This corresponds to the 'revisions' table in a Subversion fs.
 632     self.revs_db_file = SVN_REVISIONS_DB
 633     self.revs_db = Database(self.revs_db_file, 'n')
 634
 635     # This corresponds to the 'nodes' table in a Subversion fs.  (We
 636     # don't need a 'representations' or 'strings' table because we
 637     # only track metadata, not file contents.)
 638     self.nodes_db_file = NODES_DB
 639     self.nodes_db = Database(self.nodes_db_file, 'n')
 640
 641     # This tracks which symbolic names the current "head" of a given
 642     # filepath could be the origin node for.  When the next commit on
 643     # that path comes along, we can tell which symbolic names
 644     # originated in the previous version, and signal back to the
 645     # caller that the file can no longer be the origin for those names.
 646     #
 647     # The values are tuples, (tags, branches), where each value is a
 648     # list.
 649     self.symroots_db_file = SYMBOLIC_NAME_ROOTS_DB
 650     self.symroots_db = Database(self.symroots_db_file, 'n')
 651
 652     # When copying a directory (say, to create part of a branch), we
 653     # pass change_path() a list of expected entries, so it can remove
 654     # any that are in the source but don't belong on the branch.
 655     # However, because creating a given region of a branch can involve
 656     # copying from several sources, we don't want later copy
 657     # operations to delete entries that were legitimately created by
 658     # earlier copy ops.  So after a copy, the directory records
 659     # legitimate entries under this key, in a dictionary (the keys are
 660     # entry names, the values can be ignored).
 661     self.approved_entries = "/approved-entries"
 662
 663     # Set to a true value on a directory that's mutable in the
 664     # revision currently being constructed. (Yes, this is exactly
 665     # analogous to the Subversion filesystem code's concept of
 666     # mutability.)
 667     # Is also overloaded with a second piece of information.
 668     # If the value of the flag is 2, then in addition to the node
 669     # being mutable, the node and all subnodes were created by a copy
 670     # operation in the current revision. In this and only this
 671     # circumstance, it is valid for pruning to occur.
 672     self.mutable_flag = "/mutable"
 673     # This could represent a new mutable directory or file.
 674     self.empty_mutable_thang = { self.mutable_flag : 1 }
 675
 676     # Init a root directory with no entries at revision 0.
 677     self.youngest = 0
 678     youngest_key = gen_key()
 679     self.revs_db[str(self.youngest)] = youngest_key
 680     self.nodes_db[youngest_key] = {}
 681
 682   def new_revision(self):
 683     """Stabilize the current revision, then start the next one.
 684     (Increments youngest.)"""
 685     self.stabilize_youngest()
 686     self.revs_db[str(self.youngest + 1)] \
 687                                       = self.revs_db[str(self.youngest)]
 688     self.youngest = self.youngest + 1
 689
 690   def _stabilize_directory(self, key):
 691     """Close the directory whose node key is KEY."""
 692     dir = self.nodes_db[key]
 693     if dir.has_key(self.mutable_flag):
 694       del dir[self.mutable_flag]
 695       if dir.has_key(self.approved_entries):
 696         del dir[self.approved_entries]
 697       for entry_key in dir.keys():
 698         if not entry_key[0] == '/':
 699           self._stabilize_directory(dir[entry_key])
 700       self.nodes_db[key] = dir
 701
 702   def stabilize_youngest(self):
 703     """Stabilize the current revision by removing mutable flags."""
 704     root_key = self.revs_db[str(self.youngest)]
 705     self._stabilize_directory(root_key)
 706
 707   def probe_path(self, path, revision=-1, debugging=None):
 708     """If PATH exists in REVISION of the svn repository mirror,
 709     return its leaf value, else return None.
 710     If DEBUGGING is true, then print trace output to stdout.
 711     REVISION defaults to youngest, and PATH must not start with '/'."""
 712     components = string.split(path, '/')
 713     if revision == -1:
 714       revision = self.youngest
 715
 716     if debugging:
 717       print "PROBING path: '%s' in %d" % (path, revision)
 718
 719     parent_key = self.revs_db[str(revision)]
 720     parent = self.nodes_db[parent_key]
 721     previous_component = "/"
 722
 723     i = 1
 724     for component in components:
 725
 726       if debugging:
 727         print "  " * i,
 728         print "'%s' key: %s, val:" % (previous_component, parent_key), parent
 729
 730       if not parent.has_key(component):
 731         if debugging:
 732           print "  PROBE ABANDONED: '%s' does not contain '%s'" \
 733                 % (previous_component, component)
 734         return None
 735
 736       this_entry_key = parent[component]
 737       this_entry_val = self.nodes_db[this_entry_key]
 738       parent_key = this_entry_key
 739       parent = this_entry_val
 740       previous_component = component
 741       i = i + 1
 742
 743     if debugging:
 744       print "  " * i,
 745       print "parent_key: %s, val:" % parent_key, parent
 746
 747     # It's not actually a parent at this point, it's the leaf node.
 748     return parent
 749
 750   def change_path(self, path, tags, branches,
 751                   intermediate_dir_func=None,
 752                   copyfrom_path=None, copyfrom_rev=None,
 753                   expected_entries=None, only_if_already_exists=None):
 754     """Record a change to PATH.  PATH may not have a leading slash.
 755     Return a Change instance representing the result of the
 756     change.
 757
 758     TAGS are any tags that sprout from this revision of PATH, BRANCHES
 759     are any branches that sprout from this revision of PATH.
 760
 761     If INTERMEDIATE_DIR_FUNC is not None, then invoke it once on
 762     each full path to each missing intermediate directory in PATH, in
 763     order from shortest to longest.
 764
 765     If COPYFROM_REV and COPYFROM_PATH are not None, then they are a
 766     revision and path to record as the copyfrom sources of this node.
 767     Since this implies an add (OP_ADD), it would be reasonable to
 768     error and exit if the copyfrom args are present but the node also
 769     already exists.  Reasonable -- but not what we do :-).  The most
 770     useful behavior for callers is instead to report that nothing was
 771     done, by returning OP_NOOP for Change.op, so that's what we do.
 772
 773     It is an error for only one copyfrom argument to be present.
 774
 775     If EXPECTED_ENTRIES is not None, then it holds entries expected
 776     to be in the dst after the copy.  Any entries in the new dst but
 777     not in EXPECTED_ENTRIES are removed (ignoring keys beginning with
 778     '/'), and the removed entries returned in Change.deleted_entries,
 779     which are otherwise None.
 780
 781     No action is taken for keys in EXPECTED_ENTRIES but not in the
 782     dst; it is assumed that the caller will compensate for these by
 783     calling change_path again with other arguments.
 784
 785     If ONLY_IF_ALREADY_EXISTS is set, then do a no-op, rather than an add,
 786     if the path does not exist. This is to allow pruning using EXPECTED_ENTRIES
 787     without risking erroneously adding a path."""
 788
 789     # Check caller sanity.
 790     if ((copyfrom_rev and not copyfrom_path) or
 791         (copyfrom_path and not copyfrom_rev)):
 792       sys.stderr.write("%s: change_path() called with one copyfrom "
 793                        "argument but not the other.\n" % error_prefix)
 794       sys.exit(1)
 795
 796     components = string.split(path, '/')
 797     path_so_far = None
 798
 799     deletions = []
 800     in_pruneable_subtree = None
 801
 802     parent_key = self.revs_db[str(self.youngest)]
 803     parent = self.nodes_db[parent_key]
 804     if not parent.has_key(self.mutable_flag):
 805       parent_key = gen_key()
 806       parent[self.mutable_flag] = 1
 807       self.nodes_db[parent_key] = parent
 808       self.revs_db[str(self.youngest)] = parent_key
 809
 810     for component in components[:-1]:
 811       # parent is always mutable at the top of the loop
 812
 813       if path_so_far:
 814         path_so_far = path_so_far + '/' + component
 815       else:
 816         path_so_far = component
 817
 818       # Ensure that the parent has an entry for this component.
 819       if not parent.has_key(component):
 820         if only_if_already_exists:
 821           return Change(OP_NOOP, [], [], deletions)
 822         # else
 823         new_child_key = gen_key()
 824         parent[component] = new_child_key
 825         self.nodes_db[new_child_key] = self.empty_mutable_thang
 826         self.nodes_db[parent_key] = parent
 827         if intermediate_dir_func:
 828           intermediate_dir_func(path_so_far)
 829
 830       # One way or another, parent dir now has an entry for component,
 831       # so grab it, see if it's mutable, and DTRT if it's not.  (Note
 832       # it's important to reread the entry value from the db, even
 833       # though we might have just written it -- if we tweak existing
 834       # data structures, we could modify self.empty_mutable_thang,
 835       # which must not happen.)
 836       this_entry_key = parent[component]
 837       this_entry_val = self.nodes_db[this_entry_key]
 838       mutable = this_entry_val.get(self.mutable_flag)
 839       if not mutable:
 840         this_entry_val[self.mutable_flag] = 1
 841         this_entry_key = gen_key()
 842         parent[component] = this_entry_key
 843         self.nodes_db[this_entry_key] = this_entry_val
 844         self.nodes_db[parent_key] = parent
 845       elif mutable == 2:
 846         in_pruneable_subtree = 1
 847
 848       parent_key = this_entry_key
 849       parent = this_entry_val
 850
 851     # Now change the last node, the versioned file.  Just like at the
 852     # top of the above loop, parent is already mutable.
 853     op = OP_ADD
 854     if self.symroots_db.has_key(path):
 855       old_names = self.symroots_db[path]
 856     else:
 857       old_names = [], []
 858     last_component = components[-1]
 859     new_val = { }
 860     if parent.has_key(last_component):
 861       # The contract for copying over existing nodes is to do nothing
 862       # and return:
 863       if copyfrom_path:
 864         return Change(OP_NOOP, old_names[0], old_names[1], deletions)
 865       # else
 866       op = OP_CHANGE
 867       new_val = self.nodes_db[parent[last_component]]
 868     elif only_if_already_exists:
 869       return Change(OP_NOOP, [], [], deletions)
 870
 871     leaf_key = gen_key()
 872     if copyfrom_path:
 873       new_val = self.probe_path(copyfrom_path, copyfrom_rev)
 874       if new_val is None:
 875         # Sometimes a branch is rooted in a revision that RCS has
 876         # marked as 'dead'. There is no reason to assume that the
 877         # current path shares any history with any older live parent
 878         # of the dead revision, so we do nothing and return.
 879         return Change(OP_NOOP, [], [], deletions)
 880       # Special value of mutable flag indicates that this subtree was created
 881       # by copying in this revision. Iff this is true, then it is valid to
 882       # use expected_entries to prune items.
 883       new_val[self.mutable_flag] = 2
 884       in_pruneable_subtree = 1
 885     else:
 886       new_val[self.mutable_flag] = 1
 887     if expected_entries is not None:
 888       # If it is not None, then even if it is an empty list/tuple,
 889       # we need to approve this item in its parent's approved entries list.
 890       approved_entries = parent.get(self.approved_entries) or {}
 891       approved_entries[last_component] = 1
 892       parent[self.approved_entries] = approved_entries
 893     if expected_entries:
 894       approved_entries = new_val.get(self.approved_entries) or { }
 895       new_approved_entries = { }
 896       for ent in new_val.keys():
 897         if (ent[0] != '/'):
 898           if (not expected_entries.has_key(ent)
 899               and not approved_entries.has_key(ent)):
 900             if in_pruneable_subtree:
 901               del new_val[ent]
 902               deletions.append(ent)
 903           else:
 904             new_approved_entries[ent] = 1
 905       new_val[self.approved_entries] = new_approved_entries
 906     parent[last_component] = leaf_key
 907     self.nodes_db[parent_key] = parent
 908     self.symroots_db[path] = (tags, branches)
 909     self.nodes_db[leaf_key] = new_val
 910
 911     return Change(op, old_names[0], old_names[1], deletions, copyfrom_rev)
 912
 913   def delete_path(self, path, tags, branches, prune=None):
 914     """Delete PATH from the tree.  PATH may not have a leading slash.
 915
 916     Return a tuple (path_deleted, closed_tags, closed_branches), where
 917     path_deleted is the path actually deleted or None if PATH did not
 918     exist, and closed_tags and closed_branches are lists of symbolic
 919     names closed off by this deletion -- that is, tags or branches
 920     which could be rooted in the previous revision of PATH, but not in
 921     this revision, because this rev changes PATH.  If path_deleted is
 922     None, then closed_tags and closed_branches will both be empty.
 923
 924     TAGS are any tags that sprout from this revision of PATH, BRANCHES
 925     are any branches that sprout from this revision of PATH.  (I can't
 926     imagine that there are any of either, what to do if there are?)
 927
 928     If PRUNE is not None, then delete the highest possible directory,
 929     which means the returned path may differ from PATH.  In other
 930     words, if PATH was the last entry in its parent, then delete
 931     PATH's parent, unless it too is the last entry in *its* parent, in
 932     which case delete that parent, and so on up the chain, until a
 933     directory is encountered that has an entry which is not a member
 934     of the parent stack of the original target.
 935
 936     NOTE: This function does *not* allow you delete top-level entries
 937     (like /trunk, /branches, /tags), not does it prune upwards beyond
 938     those entries.
 939
 940     PRUNE is like the -P option to 'cvs checkout'."""
 941
 942     components = string.split(path, '/')
 943     path_so_far = None
 944
 945     parent_key = self.revs_db[str(self.youngest)]
 946     parent = self.nodes_db[parent_key]
 947
 948     # As we walk down to find the dest, we remember each parent
 949     # directory's name and db key, in reverse order: push each new key
 950     # onto the front of the list, so that by the time we reach the
 951     # destination node, the zeroth item in the list is the parent of
 952     # that destination.
 953     #
 954     # Then if we actually do the deletion, we walk the list from left
 955     # to right, replacing as appropriate.
 956     #
 957     # The root directory has name None.
 958     parent_chain = [ ]
 959     parent_chain.insert(0, (None, parent_key))
 960
 961     def is_prunable(dir):
 962       """Return true if DIR, a dictionary representing a directory,
 963       has just zero or one non-special entry, else return false.
 964       (In a pure world, we'd just ask len(DIR) > 1; it's only
 965       because the directory might have mutable flags and other special
 966       entries that we need this function at all.)"""
 967       num_items = len(dir)
 968       if num_items > 3:
 969         return None
 970       if num_items == 3 or num_items == 2:
 971         real_entries = 0
 972         for key in dir.keys():
 973           if not key[0] == '/': real_entries = real_entries + 1
 974         if real_entries > 1:
 975           return None
 976         else:
 977           return 1
 978       else:
 979         return 1
 980
 981     # We never prune our top-level directories (/trunk, /tags, /branches)
 982     if len(components) < 2:
 983       return None, [], []
 984
 985     for component in components[:-1]:
 986       if path_so_far:
 987         path_so_far = path_so_far + '/' + component
 988       else:
 989         path_so_far = component
 990
 991       # If we can't reach the dest, then we don't need to do anything.
 992       if not parent.has_key(component):
 993         return None, [], []
 994
 995       # Otherwise continue downward, dropping breadcrumbs.
 996       this_entry_key = parent[component]
 997       this_entry_val = self.nodes_db[this_entry_key]
 998       parent_key = this_entry_key
 999       parent = this_entry_val
1000       parent_chain.insert(0, (component, parent_key))
1001
1002     # If the target is not present in its parent, then we're done.
1003     last_component = components[-1]
1004     old_names = [], []
1005     if not parent.has_key(last_component):
1006       return None, [], []
1007     elif self.symroots_db.has_key(path):
1008       old_names = self.symroots_db[path]
1009       del self.symroots_db[path]
1010
1011     # The target is present, so remove it and bubble up, making a new
1012     # mutable path and/or pruning as necessary.
1013     pruned_count = 0
1014     prev_entry_name = last_component
1015     new_key = None
1016     for parent_item in parent_chain:
1017       pkey = parent_item[1]
1018       pval = self.nodes_db[pkey]
1019
1020       # If we're pruning at all, and we're looking at a prunable thing
1021       # (and that thing isn't one of our top-level directories --
1022       # trunk, tags, branches) ...
1023       if prune and (new_key is None) and is_prunable(pval) \
1024          and parent_item != parent_chain[-2]:
1025         # ... then up our count of pruned items, and do nothing more.
1026         # All the action takes place when we hit a non-prunable
1027         # parent.
1028         pruned_count = pruned_count + 1
1029       else:
1030         # Else, we've hit a non-prunable, or aren't pruning, so bubble
1031         # up the new gospel.
1032         pval[self.mutable_flag] = 1
1033         if new_key is None:
1034           del pval[prev_entry_name]
1035         else:
1036           pval[prev_entry_name] = new_key
1037         new_key = gen_key()
1038
1039       prev_entry_name = parent_item[0]
1040       if new_key:
1041         self.nodes_db[new_key] = pval
1042
1043     if new_key is None:
1044       new_key = gen_key()
1045       self.nodes_db[new_key] = self.empty_mutable_thang
1046
1047     # Install the new root entry.
1048     self.revs_db[str(self.youngest)] = new_key
1049
1050     # Sanity check -- this should be a "can't happen".
1051     if pruned_count > len(components):
1052       sys.stderr.write("%s: deleting '%s' tried to prune %d components.\n"
1053                        % (error_prefix, path, pruned_count))
1054       sys.exit(1)
1055
1056     if pruned_count:
1057       if pruned_count == len(components):
1058         # We never prune away the root directory, so back up one component.
1059         pruned_count = pruned_count - 1
1060       retpath = string.join(components[:0 - pruned_count], '/')
1061     else:
1062       retpath = path
1063
1064     return retpath, old_names[0], old_names[1]
1065
1066     ### We've no place to put tags + branches.  Suspect we just
1067     ### shouldn't be taking them as arguments, which the doc string
1068     ### implies already.  Ponder.
1069
1070   def close(self):
1071     # Just stabilize the last revision.  This may or may not affect
1072     # anything, but if we end up using the mirror for anything after
1073     # this, it's nice to know the '/mutable' entries are gone.
1074     self.stabilize_youngest()
1075
1076 if sys.platform == "win32":
1077   def escape_shell_arg(str):
1078     return '"' + string.replace(str, '"', '"^""') + '"'
1079 else:
1080   def escape_shell_arg(str):
1081     return "'" + string.replace(str, "'", "'\\''") + "'"
1082
1083 class Dumper:
1084   def __init__(self, ctx):
1085     'Open DUMPFILE_PATH, and initialize revision to REVISION.'
1086     self.dumpfile_path = ctx.dumpfile
1087     self.revision = 0
1088     self.repos_mirror = RepositoryMirror()
1089     self.svnadmin = ctx.svnadmin
1090     self.target = ctx.target
1091     self.dump_only = ctx.dump_only
1092     self.dumpfile = None
1093     self.path_encoding = ctx.encoding
1094     self.loader_pipe = None
1095
1096     # If all we're doing here is dumping, we can go ahead and
1097     # initialize our single dumpfile.  Else, if we're suppose to
1098     # create the repository, do so.
1099     if self.dump_only:
1100       self.init_dumpfile()
1101       self.write_dumpfile_header(self.dumpfile)
1102     else:
1103       if not ctx.existing_svnrepos:
1104         print "creating repos '%s'" % (self.target)
1105         run_command('%s create %s %s' % (self.svnadmin, ctx.bdb_txn_nosync
1106           and "--bdb-txn-nosync" or "", self.target))
1107       self.loader_pipe = os.popen('%s load -q %s' %
1108           (self.svnadmin, self.target), PIPE_WRITE_MODE)
1109       self.write_dumpfile_header(self.loader_pipe)
1110
1111
1112   def init_dumpfile(self):
1113     # Open the dumpfile for binary-mode write.
1114     self.dumpfile = open(self.dumpfile_path, 'wb')
1115
1116
1117   def write_dumpfile_header(self, fileobj):
1118     # Initialize the dumpfile with the standard headers:
1119     #
1120     # The CVS repository doesn't have a UUID, and the Subversion
1121     # repository will be created with one anyway.  So when we load
1122     # the dumpfile, we don't specify a UUID.
1123     fileobj.write('SVN-fs-dump-format-version: 2\n\n')
1124
1125   def flush_and_remove_dumpfile(self):
1126     if self.dumpfile is None:
1127       return
1128     self.dumpfile.close()
1129     print "piping revision %d into '%s' loader" % (self.revision, self.target)
1130     dumpfile = open(self.dumpfile_path, 'rb')
1131     while 1:
1132       data = dumpfile.read(1024*1024) # Choice of 1MB chunks is arbitrary
1133       if not len(data): break
1134       self.loader_pipe.write(data)
1135     dumpfile.close()
1136
1137     os.remove(self.dumpfile_path)
1138
1139   def start_revision(self, props):
1140     """Write the next revision, with properties, to the dumpfile.
1141     Return the newly started revision."""
1142
1143     # If this is not a --dump-only, we need to flush (load into the
1144     # repository) any dumpfile data we have already written and the
1145     # init a new dumpfile before starting this revision.
1146
1147     if not self.dump_only:
1148       if self.revision > 0:
1149         self.flush_and_remove_dumpfile()
1150       self.init_dumpfile()
1151
1152     self.revision = self.revision + 1
1153
1154     # A revision typically looks like this:
1155     #
1156     #   Revision-number: 1
1157     #   Prop-content-length: 129
1158     #   Content-length: 129
1159     #
1160     #   K 7
1161     #   svn:log
1162     #   V 27
1163     #   Log message for revision 1.
1164     #   K 10
1165     #   svn:author
1166     #   V 7
1167     #   jrandom
1168     #   K 8
1169     #   svn:date
1170     #   V 27
1171     #   2003-04-22T22:57:58.132837Z
1172     #   PROPS-END
1173     #
1174     # Notice that the length headers count everything -- not just the
1175     # length of the data but also the lengths of the lengths, including
1176     # the 'K ' or 'V ' prefixes.
1177     #
1178     # The reason there are both Prop-content-length and Content-length
1179     # is that the former includes just props, while the latter includes
1180     # everything.  That's the generic header form for any entity in a
1181     # dumpfile.  But since revisions only have props, the two lengths
1182     # are always the same for revisions.
1183
1184     # Calculate the total length of the props section.
1185     total_len = 10  # len('PROPS-END\n')
1186     for propname in props.keys():
1187       klen = len(propname)
1188       klen_len = len('K %d' % klen)
1189       vlen = len(props[propname])
1190       vlen_len = len('V %d' % vlen)
1191       # + 4 for the four newlines within a given property's section
1192       total_len = total_len + klen + klen_len + vlen + vlen_len + 4
1193
1194     # Print the revision header and props
1195     self.dumpfile.write('Revision-number: %d\n'
1196                         'Prop-content-length: %d\n'
1197                         'Content-length: %d\n'
1198                         '\n'
1199                         % (self.revision, total_len, total_len))
1200
1201     for propname in props.keys():
1202       self.dumpfile.write('K %d\n'
1203                           '%s\n'
1204                           'V %d\n'
1205                           '%s\n' % (len(propname),
1206                                     propname,
1207                                     len(props[propname]),
1208                                     props[propname]))
1209
1210     self.dumpfile.write('PROPS-END\n')
1211     self.dumpfile.write('\n')
1212
1213     self.repos_mirror.new_revision()
1214     return self.revision
1215
1216   def add_dir(self, path):
1217     self.dumpfile.write("Node-path: %s\n"
1218                         "Node-kind: dir\n"
1219                         "Node-action: add\n"
1220                         "Prop-content-length: 10\n"
1221                         "Content-length: 10\n"
1222                         "\n"
1223                         "PROPS-END\n"
1224                         "\n"
1225                         "\n" % self.utf8_path(path))
1226
1227   def utf8_path(self, path):
1228     """Return UTF-8 encoded 'path' based on ctx.path_encoding."""
1229     try:
1230       ### Log messages can be converted with 'replace' strategy.
1231       ### We can't afford that here.
1232       unicode_path = unicode(path, self.path_encoding, 'strict')
1233       return unicode_path.encode('utf-8')
1234
1235     except UnicodeError:
1236       print "Unable to convert a path '%s' to internal encoding." % path
1237       print "Consider rerunning with (for example) '--encoding=latin1'"
1238       sys.exit(1)
1239
1240
1241   def probe_path(self, path):
1242     """Return true if PATH exists in the youngest tree of the svn
1243     repository, else return None.  PATH does not start with '/'."""
1244     if self.repos_mirror.probe_path(path) is None:
1245       return None
1246     else:
1247       return 1
1248
1249   def copy_path(self, svn_src_path, svn_src_rev, svn_dst_path, entries=None):
1250     """If it wouldn't be redundant to do so, emit a copy of SVN_SRC_PATH at
1251     SVN_SRC_REV to SVN_DST_PATH.
1252
1253     Return 1 if the copy was done, None otherwise.
1254
1255     If ENTRIES is not None, it is a dictionary whose keys are the full
1256     set of entries the new copy is expected to have -- and therefore
1257     any entries in the new dst but not in ENTRIES will be removed.
1258     (Keys in ENTRIES beginning with '/' are ignored.)
1259
1260     No action is taken for keys in ENTRIES but not in the dst; it is
1261     assumed that the caller will compensate for these by calling
1262     copy_path again with other arguments."""
1263     change = self.repos_mirror.change_path(svn_dst_path,
1264                                            [], [],
1265                                            self.add_dir,
1266                                            svn_src_path, svn_src_rev,
1267                                            entries)
1268     if change.op == OP_ADD:
1269       if change.copyfrom_rev >= self.revision:
1270         sys.stderr.write("%s: invalid copyfrom revision %d used while\n"
1271                          "creating revision %d in dumpfile.\n"
1272                          % (error_prefix, change.copyfrom_rev, self.revision))
1273         sys.exit(1)
1274
1275       # We don't need to include "Node-kind:" for copies; the loader
1276       # ignores it anyway and just uses the source kind instead.
1277       self.dumpfile.write('Node-path: %s\n'
1278                           'Node-action: add\n'
1279                           'Node-copyfrom-rev: %d\n'
1280                           'Node-copyfrom-path: /%s\n'
1281                           '\n'
1282                           % (self.utf8_path(svn_dst_path),
1283                              change.copyfrom_rev,
1284                              self.utf8_path(svn_src_path)))
1285
1286       for ent in change.deleted_entries:
1287         self.dumpfile.write('Node-path: %s\n'
1288                             'Node-action: delete\n'
1289                             '\n' % (self.utf8_path(svn_dst_path + '/' + ent)))
1290       return 1
1291     return None
1292
1293   def prune_entries(self, path, expected):
1294     """Delete any entries in PATH that are not in list EXPECTED.
1295     PATH need not be a directory, but of course nothing will happen if
1296     it's a file.  Entries beginning with '/' are ignored as usual."""
1297     change = self.repos_mirror.change_path(path,
1298                                            [], [],
1299                                            self.add_dir,
1300                                            None, None,
1301                                            expected, 1)
1302     for ent in change.deleted_entries:
1303       self.dumpfile.write('Node-path: %s\n'
1304                           'Node-action: delete\n'
1305                           '\n' % (self.utf8_path(path + '/' + ent)))
1306
1307   def add_or_change_path(self, cvs_path, svn_path, cvs_rev, rcs_file,
1308                          tags, branches, ctx):
1309     # figure out the real file path for "co"
1310     try:
1311       f_st = os.stat(rcs_file)
1312     except os.error:
1313       dirname, fname = os.path.split(rcs_file)
1314       rcs_file = os.path.join(dirname, 'Attic', fname)
1315       f_st = os.stat(rcs_file)
1316
1317     # We begin with only a "CVS revision" property.
1318     if ctx.cvs_revnums:
1319       prop_contents = 'K 15\ncvs2svn:cvs-rev\nV %d\n%s\n' \
1320                       % (len(cvs_rev), cvs_rev)
1321     else:
1322       prop_contents = ''
1323
1324     # Check for executable-ness.
1325     if f_st[0] & stat.S_IXUSR:
1326       prop_contents = prop_contents + 'K 14\nsvn:executable\nV 1\n*\n'
1327
1328     # Set MIME type, and maybe eol-style for text files.
1329     if ctx.mime_mapper:
1330       mime_type = ctx.mime_mapper.get_type_from_filename(cvs_path)
1331       if mime_type:
1332         prop_contents = prop_contents + ('K 13\nsvn:mime-type\nV %d\n%s\n' % \
1333             (len(mime_type), mime_type))
1334         if ctx.set_eol_style and mime_type.startswith("text/"):
1335           prop_contents = prop_contents + 'K 13\nsvn:eol-style\nV 6\nnative\n'
1336
1337     # Calculate the property length (+10 for "PROPS-END\n")
1338     props_len = len(prop_contents) + 10
1339
1340     ### FIXME: We ought to notice the -kb flag set on the RCS file and
1341     ### use it to set svn:mime-type.
1342     ### (How this will interact with the mime-mapper code
1343     ### has yet to be decided.)
1344
1345     basename = os.path.basename(rcs_file[:-2])
1346     pipe_cmd = 'co -q -x,v -p%s %s' % (cvs_rev, escape_shell_arg(rcs_file))
1347     pipe = os.popen(pipe_cmd, PIPE_READ_MODE)
1348
1349     # You might think we could just test
1350     #
1351     #   if cvs_rev[-2:] == '.1':
1352     #
1353     # to determine if this path exists in head yet.  But that wouldn't
1354     # be perfectly reliable, both because of 'cvs commit -r', and also
1355     # the possibility of file resurrection.
1356     change = self.repos_mirror.change_path(svn_path, tags, branches,
1357                                            self.add_dir)
1358
1359     if change.op == OP_ADD:
1360       action = 'add'
1361     else:
1362       action = 'change'
1363
1364     self.dumpfile.write('Node-path: %s\n'
1365                         'Node-kind: file\n'
1366                         'Node-action: %s\n'
1367                         'Prop-content-length: %d\n'
1368                         'Text-content-length: '
1369                         % (self.utf8_path(svn_path), action, props_len))
1370
1371     pos = self.dumpfile.tell()
1372
1373     self.dumpfile.write('0000000000000000\n'
1374                         'Text-content-md5: 00000000000000000000000000000000\n'
1375                         'Content-length: 0000000000000000\n'
1376                         '\n')
1377
1378     self.dumpfile.write(prop_contents + 'PROPS-END\n')
1379
1380     # Insert the rev contents, calculating length and checksum as we go.
1381     checksum = md5.new()
1382     length = 0
1383     buf = pipe.read()
1384     while buf:
1385       checksum.update(buf)
1386       length = length + len(buf)
1387       self.dumpfile.write(buf)
1388       buf = pipe.read()
1389     if pipe.close() is not None:
1390       sys.exit('%s: Command failed: "%s"' % (error_prefix, pipe_cmd))
1391
1392     # Go back to patch up the length and checksum headers:
1393     self.dumpfile.seek(pos, 0)
1394     # We left 16 zeros for the text length; replace them with the real
1395     # length, padded on the left with spaces:
1396     self.dumpfile.write('%16d' % length)
1397     # 16... + 1 newline + len('Text-content-md5: ') == 35
1398     self.dumpfile.seek(pos + 35, 0)
1399     self.dumpfile.write(checksum.hexdigest())
1400     # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
1401     self.dumpfile.seek(pos + 84, 0)
1402     # The content length is the length of property data, text data,
1403     # and any metadata around/inside around them.
1404     self.dumpfile.write('%16d' % (length + props_len))
1405     # Jump back to the end of the stream
1406     self.dumpfile.seek(0, 2)
1407
1408     # This record is done (write two newlines -- one to terminate
1409     # contents that weren't themselves newline-termination, one to
1410     # provide a blank line for readability.
1411     self.dumpfile.write('\n\n')
1412     return change.closed_tags, change.closed_branches
1413
1414   def delete_path(self, svn_path, tags, branches, prune=None):
1415     """If SVN_PATH exists in the head mirror, output the deletion to
1416     the dumpfile, else output nothing to the dumpfile.
1417
1418     Return a tuple (path_deleted, closed_tags, closed_branches), where
1419     path_deleted is the path deleted if any or None if no deletion was
1420     necessary, and closed_tags and closed_names are lists of symbolic
1421     names closed off by this deletion -- that is, tags or branches
1422     which could be rooted in the previous revision of PATH, but not in
1423     this revision, because this rev changes PATH.  If path_deleted is
1424     None, then closed_tags and closed_branches will both be empty.
1425
1426     Iff PRUNE is true, then the path deleted can be not None, yet
1427     shorter than SVN_PATH because of pruning."""
1428     deleted_path, closed_tags, closed_branches \
1429                   = self.repos_mirror.delete_path(svn_path, tags,
1430                                                   branches, prune)
1431     if deleted_path:
1432       print "    (deleted '%s')" % deleted_path
1433       self.dumpfile.write('Node-path: %s\n'
1434                           'Node-action: delete\n'
1435                           '\n' % self.utf8_path(deleted_path))
1436     return deleted_path, closed_tags, closed_branches
1437
1438   def close(self):
1439     self.repos_mirror.close()
1440
1441     # If we're only making a dumpfile, we should be done now.  Just
1442     # close the dumpfile.  Otherwise, we're in "incremental" mode, and
1443     # we need to close our incremental dumpfile, flush it to the
1444     # repository, and then remove it.
1445     if self.dump_only:
1446       self.dumpfile.close()
1447     else:
1448       self.flush_and_remove_dumpfile()
1449       ret = self.loader_pipe.close()
1450       if ret:
1451         sys.stderr.write('%s: svnadmin load exited with error code %s' %
1452             (error_prefix, ret))
1453         sys.exit(1)
1454
1455
1456 def format_date(date):
1457   """Return an svn-compatible date string for DATE (seconds since epoch)."""
1458   # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
1459   return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
1460
1461
1462 def make_revision_props(ctx, symbolic_name, is_tag, date=None):
1463   """Return a dictionary of revision properties for the manufactured
1464   commit that finished SYMBOLIC_NAME.  If IS_TAG is true, write the
1465   log message as though for a tag, else as though for a branch.
1466   If DATE is passed, use it as the value of the svn:date property."""
1467   if is_tag:
1468     type = 'tag'
1469   else:
1470     type = 'branch'
1471
1472   # In Python 2.2.3, we could use textwrap.fill().  Oh well :-).
1473   if len(symbolic_name) >= 13:
1474     space_or_newline = '\n'
1475   else:
1476     space_or_newline = ' '
1477
1478   log = "This commit was manufactured by cvs2svn to create %s%s'%s'." \
1479         % (type, space_or_newline, symbolic_name)
1480
1481   return { 'svn:author' : ctx.username,
1482            'svn:log' : log,
1483            'svn:date' : date or format_date(time.time())}
1484
1485
1486 class SymbolicNameTracker:
1487   """Track the Subversion path/revision ranges of CVS symbolic names.
1488   This is done in a .db file, representing a tree in the usual way.
1489   In addition to directory entries, each object in the database stores
1490   the earliest revision from which it could be copied, and the first
1491   revision from which it could no longer be copied.  Intermediate
1492   directories go one step farther: they record counts for the various
1493   revisions from which items under them could have been copied, and
1494   counts for the cutoff revisions.  For example:
1495
1496                                .----------.
1497                                |  sub1    | [(2, 1), (3, 3)]
1498                                |  /       | [(5, 1), (17, 2), (50, 1)]
1499                                | /        |
1500                                |/ sub2    |
1501                                /    \     |
1502                               /|_____\____|
1503                              /        \
1504                       ______/          \_________
1505                      /                           \
1506                     /                             \
1507                    /                               \
1508               .---------.                     .---------.
1509               |  file1  |                     |  file3  |
1510               |   /     | [(3, 2)]            |     \   | [(2, 1), (3, 1)]
1511               |  /      | [(17, 1), (50, 1)]  |      \  | [(5, 1), (10, 1)]
1512               | /       |                     |       \ |
1513               |/ file2  |                     |  file4 \|
1514               /    \    |                     |    /    \
1515              /|_____\___|                     |___/_____|\
1516             /        \                           /        \
1517            /          \                         /          \
1518           /            \                       /            \
1519          /              +                     /              +
1520     +======+            |                 +======+           |
1521     |      | [(3, 1)]   |                 |      | [(2, 1)]  |
1522     |      | [(17, 1)]  |                 |      | [(5, 1)]  |
1523     |      |            |                 |      |           |
1524     +======+            |                 +======+           |
1525                     +======+                             +======+
1526                     |      | [(3, 1)]                    |      | [(3, 1)]
1527                     |      | [(50, 1)]                   |      | [(17, 1)]
1528                     |      |                             |      |
1529                     +======+                             +======+
1530
1531   The two lists to the right of each node represent the 'opening' and
1532   'closing' revisions respectively.  Each tuple in a list is of the
1533   form (REV, COUNT).  For leaf nodes, COUNT is always 1, of course.
1534   For intermediate nodes, the counts are the sums of the corresponding
1535   counts of child nodes.
1536
1537   These revision scores are used to determine the optimal copy
1538   revisions for each tree/subtree at branch or tag creation time.
1539
1540   The svn path input will most often be a trunk path, because the
1541   path/rev information recorded here is about where and when the given
1542   symbolic name could be rooted, *not* a path/rev for which commits
1543   along that symbolic name take place (of course, commits only happen on
1544   branches anyway)."""
1545
1546   def __init__(self):
1547     self.db_file = SYMBOLIC_NAMES_DB
1548     self.db = Database(self.db_file, 'n')
1549     self.root_key = gen_key()
1550     self.db[self.root_key] = {}
1551
1552     # The keys for the opening and closing revision lists attached to
1553     # each directory or file.  Includes "/" so as never to conflict
1554     # with any real entry.
1555     self.tags_opening_revs_key = "/tag-openings"
1556     self.tags_closing_revs_key = "/tag-closings"
1557     self.br_opening_revs_key   = "/br-openings"
1558     self.br_closing_revs_key   = "/br-closings"
1559
1560     # When a node is copied into the repository, the revision copied
1561     # is stored under the appropriate key, and the corresponding
1562     # opening and closing rev lists are removed.
1563     self.tags_copyfrom_rev_key = "/tags-copyfrom-rev"
1564     self.br_copyfrom_rev_key = "/br-copyfrom-rev"
1565
1566   def probe_path(self, symbolic_name, path, debugging=None):
1567     """If 'SYMBOLIC_NAME/PATH' exists in the symbolic name tree,
1568     return the value of its last component, else return None.
1569     PATH may be None, but may not start with '/'.
1570     If DEBUGGING is true, then print trace output to stdout."""
1571     if path:
1572       components = [symbolic_name] + string.split(path, '/')
1573     else:
1574       components = [symbolic_name]
1575
1576     if debugging:
1577       print "PROBING SYMBOLIC NAME:\n", components
1578
1579     parent_key = self.root_key
1580     parent = self.db[parent_key]
1581     last_component = "/"
1582     i = 1
1583     for component in components:
1584       if debugging:
1585         print "  " * i,
1586         print "'%s' key: %s, val:" % (last_component, parent_key), parent
1587
1588       # Check for a "can't happen."
1589       if not parent.has_key(component):
1590         sys.stderr.write("%s: sym probe failed: '%s' does not contain '%s'\n"
1591                          % (error_prefix, last_component, component))
1592         sys.exit(1)
1593
1594       this_entry_key = parent[component]
1595       this_entry_val = self.db[this_entry_key]
1596       parent_key = this_entry_key
1597       parent = this_entry_val
1598       last_component = component
1599       i = i + 1
1600
1601     if debugging:
1602       print "  " * i,
1603       print "parent_key: %s, val:" % parent_key, parent
1604
1605     # It's not actually a parent at this point, it's the leaf node.
1606     return parent
1607
1608   def bump_rev_count(self, item_key, rev, revlist_key):
1609     """Increment REV's count in opening or closing list under KEY.
1610     REVLIST_KEY is self.*_opening_revs_key or self.*_closing_revs_key,
1611     and indicates which rev list to increment REV's count in.
1612
1613     For example, if REV is 7, REVLIST_KEY is
1614     self.tags_opening_revs_key, and the entry's tags opening revs list
1615     looks like this
1616
1617          [(2, 5), (7, 2), (10, 15)]
1618
1619     then afterwards it would look like this:
1620
1621          [(2, 5), (7, 3), (10, 15)]
1622
1623     But if no tuple for revision 7 were present, then one would be
1624     added, for example
1625
1626          [(2, 5), (10, 15)]
1627
1628     would become
1629
1630          [(2, 5), (7, 1), (10, 15)]
1631
1632     The list is sorted by ascending revision both before and after."""
1633
1634     entry_val = self.db[item_key]
1635
1636     if not entry_val.has_key(revlist_key):
1637       entry_val[revlist_key] = [(rev, 1)]
1638     else:
1639       rev_counts = entry_val[revlist_key]
1640       for i in range(len(rev_counts)):
1641         this_rev, this_count = rev_counts[i]
1642         if rev == this_rev:
1643           rev_counts[i] = (this_rev, this_count + 1)
1644           break
1645         elif this_rev > rev:
1646           if i > 0:
1647             i = i - 1
1648           rev_counts.insert(i, (rev, 1))
1649           break
1650       else:
1651         rev_counts.append((rev, 1))
1652       entry_val[revlist_key] = rev_counts
1653
1654     self.db[item_key] = entry_val
1655
1656   # The verb form of "root" is "root", but that would be misleading in
1657   # this case; and the opposite of "uproot" is presumably "downroot",
1658   # but that wouldn't exactly clarify either.  Hence, "enroot" :-).
1659   def enroot_names(self, svn_path, svn_rev, names, opening_key):
1660     """Record SVN_PATH at SVN_REV as the earliest point from which the
1661     symbolic names in NAMES could be copied.  OPENING_KEY is
1662     self.tags_opening_revs_key or self.br_opening_revs_key, to
1663     indicate whether NAMES contains tag names or branch names.
1664     SVN_PATH does not start with '/'."""
1665
1666     # Guard against names == None
1667     if not names:
1668       return
1669
1670     for name in names:
1671       components = [name] + string.split(svn_path, '/')
1672       parent_key = self.root_key
1673       for component in components:
1674         self.bump_rev_count(parent_key, svn_rev, opening_key)
1675         parent = self.db[parent_key]
1676         if not parent.has_key(component):
1677           new_child_key = gen_key()
1678           parent[component] = new_child_key
1679           self.db[new_child_key] = {}
1680           self.db[parent_key] = parent
1681         # One way or another, parent now has an entry for component.
1682         this_entry_key = parent[component]
1683         this_entry_val = self.db[this_entry_key]
1684         # Swaparoo.
1685         parent_key = this_entry_key
1686         parent = this_entry_val
1687
1688       self.bump_rev_count(parent_key, svn_rev, opening_key)
1689
1690   def enroot_tags(self, svn_path, svn_rev, tags):
1691     """Record SVN_PATH at SVN_REV as the earliest point from which the
1692     symbolic names in TAGS could be copied.  SVN_PATH does not start
1693     with '/'."""
1694     self.enroot_names(svn_path, svn_rev, tags, self.tags_opening_revs_key)
1695
1696   def enroot_branches(self, svn_path, svn_rev, branches):
1697     """Record SVN_PATH at SVN_REV as the earliest point from which the
1698     symbolic names in BRANCHES could be copied.  SVN_PATH does not
1699     start with '/'."""
1700     self.enroot_names(svn_path, svn_rev, branches, self.br_opening_revs_key)
1701
1702   def close_names(self, svn_path, svn_rev, names, closing_key):
1703     """Record that as of SVN_REV, SVN_PATH could no longer be the
1704     source from which any of symbolic names in NAMES could be copied.
1705     CLOSING_KEY is self.tags_closing_revs_key or
1706     self.br_closing_revs_key, to indicate whether NAMES are tags or
1707     branches.  SVN_PATH does not start with '/'."""
1708
1709     # Guard against names == None
1710     if not names:
1711       return
1712
1713     for name in names:
1714       components = [name] + string.split(svn_path, '/')
1715       parent_key = self.root_key
1716       for component in components:
1717         self.bump_rev_count(parent_key, svn_rev, closing_key)
1718         parent = self.db[parent_key]
1719         # Check for a "can't happen".
1720         if not parent.has_key(component):
1721           sys.stderr.write("%s: in path '%s', value for parent key '%s' "
1722                            "does not have entry '%s'\n"
1723                            % (error_prefix, svn_path, parent_key, component))
1724           sys.exit(1)
1725         this_entry_key = parent[component]
1726         this_entry_val = self.db[this_entry_key]
1727         # Swaparoo.
1728         parent_key = this_entry_key
1729         parent = this_entry_val
1730
1731       self.bump_rev_count(parent_key, svn_rev, closing_key)
1732
1733   def close_tags(self, svn_path, svn_rev, tags):
1734     """Record that as of SVN_REV, SVN_PATH could no longer be the
1735     source from which any of TAGS could be copied.  SVN_PATH does not
1736     start with '/'."""
1737     self.close_names(svn_path, svn_rev, tags, self.tags_closing_revs_key)
1738
1739   def close_branches(self, svn_path, svn_rev, branches):
1740     """Record that as of SVN_REV, SVN_PATH could no longer be the
1741     source from which any of BRANCHES could be copied.  SVN_PATH does
1742     not start with '/'."""
1743     self.close_names(svn_path, svn_rev, branches, self.br_closing_revs_key)
1744
1745   def score_revisions(self, openings, closings):
1746     """Return a list of revisions and scores based on OPENINGS and
1747     CLOSINGS.  The returned list looks like:
1748
1749        [(REV1 SCORE1), (REV2 SCORE2), ...]
1750
1751     where REV2 > REV1.  OPENINGS and CLOSINGS are the values of
1752     self.tags_opening_revs_key and self.tags_closing_revs_key, or
1753     self.br_opening_revs_key and self.br_closing_revs_key, from some file or
1754     directory node, or else None.
1755
1756     Each score indicates that copying the corresponding revision (or any
1757     following revision up to the next revision in the list) of
1758     the object in question would yield that many correct paths at or
1759     underneath the object.  There may be other paths underneath it
1760     which are not correct and need to be deleted or recopied; those
1761     can only be detected by descending and examining their scores.
1762
1763     If OPENINGS is false, return the empty list."""
1764
1765     # First look for easy outs.
1766     if not openings:
1767       return []
1768
1769     # Must be able to call len(closings) below.
1770     if closings is None:
1771       closings = []
1772
1773     # No easy out, so wish for lexical closures and calculate the scores :-).
1774     scores = []
1775     opening_score_accum = 0
1776     for i in range(len(openings)):
1777       opening_rev, opening_score = openings[i]
1778       opening_score_accum = opening_score_accum + opening_score
1779       scores.append((opening_rev, opening_score_accum))
1780     min = 0
1781     for i in range(len(closings)):
1782       closing_rev, closing_score = closings[i]
1783       done_exact_rev = None
1784       insert_index = None
1785       insert_score = None
1786       for j in range(min, len(scores)):
1787         score_rev, score = scores[j]
1788         if score_rev >= closing_rev:
1789           if not done_exact_rev:
1790             if score_rev > closing_rev:
1791               insert_index = j
1792               insert_score = scores[j-1][1] - closing_score
1793             done_exact_rev = 1
1794           scores[j] = (score_rev, score - closing_score)
1795         else:
1796           min = j + 1
1797       if not done_exact_rev:
1798         scores.append((closing_rev,scores[-1][1] - closing_score))
1799       if insert_index is not None:
1800         scores.insert(insert_index, (closing_rev, insert_score))
1801     return scores
1802
1803   def best_rev(self, scores, prefer_rev, limit_rev):
1804     """Return the revision older than LIMIT_REV with the highest score
1805     from SCORES, a list returned by score_revisions(). When the maximum score
1806     is shared by multiple revisions, the oldest revision is selected, unless
1807     PREFER_REV is one of the possibilities, in which case, it is selected."""
1808     max_score = 0
1809     prefer_rev_score = -1
1810     rev = SVN_INVALID_REVNUM
1811     for pair in scores:
1812       if pair[1] > max_score and pair[0] < limit_rev:
1813         max_score = pair[1]
1814         rev = pair[0]
1815       if pair[0] <= prefer_rev:
1816         prefer_rev_score = pair[1]
1817     if prefer_rev_score == max_score:
1818       rev = prefer_rev
1819     return rev
1820
1821   def is_best_rev(self, scores, rev, limit_rev):
1822     """Return true if REV has the highest score for revisions older than
1823     LIMIT_REV from SCORES, a list returned by score_revisions()."""
1824     return self.best_rev(scores, rev, limit_rev) == rev
1825
1826   # Helper for copy_descend().
1827   def cleanup_entries(self, rev, limit_rev, entries, is_tag):
1828     """Return a copy of ENTRIES, minus the individual entries whose
1829     highest scoring revision doesn't match REV (and also, minus and
1830     special '/'-denoted flags).  IS_TAG is 1 or None, based on whether
1831     this work is being done for the sake of a tag or a branch."""
1832     if is_tag:
1833       opening_key = self.tags_opening_revs_key
1834       closing_key = self.tags_closing_revs_key
1835     else:
1836       opening_key = self.br_opening_revs_key
1837       closing_key = self.br_closing_revs_key
1838
1839     new_entries = {}
1840     for key in entries.keys():
1841       if key[0] == '/': # Skip flags
1842         continue
1843       entry = entries.get(key)
1844       val = self.db[entry]
1845       scores = self.score_revisions(val.get(opening_key), val.get(closing_key))
1846       if self.is_best_rev(scores, rev, limit_rev):
1847         new_entries[key] = entry
1848     return new_entries
1849
1850   # Helper for fill_branch().
1851   def copy_descend(self, dumper, ctx, name, parent, entry_name,
1852                    parent_rev, src_path, dst_path, is_tag, jit_new_rev=None):
1853     """Starting with ENTRY_NAME in directory object PARENT at
1854     PARENT_REV, use DUMPER and CTX to copy nodes in the Subversion
1855     repository, manufacturing the source paths with SRC_PATH and the
1856     destination paths with NAME and DST_PATH.
1857
1858     If IS_TAG is true, NAME is treated as a tag, else as a branch.
1859
1860     If JIT_NEW_REV is not None, it is a list of one or two elements.
1861     If the first element is true, then if any copies are to be made,
1862     invoke DUMPER.start_revision() before the first copy, then set
1863     JIT_NEW_REV[0] to None, so no more new revisions are made for this
1864     symbolic name anywhere in this descent.
1865
1866     The second element, if present, is the string to be used for the svn:date
1867     property of any JIT-created revision.
1868
1869     ('JIT' == 'Just In Time'.)"""
1870     ### Hmmm, is passing [1] instead of 1 an idiomatic way of passing
1871     ### a side-effectable boolean in Python?  That's how the
1872     ### JIT_NEW_REV parameter works here and elsewhere, but maybe
1873     ### there's a clearer way to do it?
1874
1875     key = parent[entry_name]
1876     val = self.db[key]
1877
1878     if is_tag:
1879       opening_key = self.tags_opening_revs_key
1880       closing_key = self.tags_closing_revs_key
1881       copyfrom_rev_key = self.tags_copyfrom_rev_key
1882     else:
1883       opening_key = self.br_opening_revs_key
1884       closing_key = self.br_closing_revs_key
1885       copyfrom_rev_key = self.br_copyfrom_rev_key
1886
1887     limit_rev = dumper.revision
1888     if jit_new_rev and jit_new_rev[0]:
1889       # Because in this case the current rev is complete,
1890       # so is a valid copyfrom source
1891       limit_rev = limit_rev + 1
1892
1893     if not val.has_key(copyfrom_rev_key):
1894       # If not already copied this subdir, calculate its "best rev"
1895       # and see if it differs from parent's best rev.
1896       scores = self.score_revisions(val.get(opening_key), val.get(closing_key))
1897       rev = self.best_rev(scores, parent_rev, limit_rev)
1898
1899       if rev == SVN_INVALID_REVNUM:
1900         return  # name is a branch, but we're doing a tag, or vice versa
1901
1902       else:
1903         if is_tag:
1904           copy_dst = make_path(ctx, dst_path, None, name)
1905         else:
1906           copy_dst = make_path(ctx, dst_path, name, None)
1907
1908         expected_entries = self.cleanup_entries(rev, limit_rev,
1909                                                 val, is_tag)
1910         if (rev != parent_rev):
1911           if jit_new_rev and jit_new_rev[0]:
1912             dumper.start_revision(make_revision_props(ctx, name, is_tag,
1913               len(jit_new_rev) > 1 and jit_new_rev[1] or None))
1914             jit_new_rev[0] = None
1915           if dumper.copy_path(src_path, rev, copy_dst, expected_entries):
1916             parent_rev = rev
1917           else:
1918             # If we didn't copy, then we need to prune
1919             dumper.prune_entries(copy_dst, expected_entries)
1920         else:
1921           # Even if we kept the already-present revision of this entry
1922           # instead of copying a new one, we still need to prune out
1923           # anything that's not part of the symbolic name.
1924           dumper.prune_entries(copy_dst, expected_entries)
1925
1926         # Record that this copy is done:
1927         val[copyfrom_rev_key] = parent_rev
1928         if val.has_key(opening_key):
1929           del val[opening_key]
1930         if val.has_key(closing_key):
1931           del val[closing_key]
1932         self.db[key] = val
1933
1934     for ent in val.keys():
1935       if not ent[0] == '/':
1936         if src_path:
1937           next_src = src_path + '/' + ent
1938         else:
1939           next_src = ent
1940         if dst_path:
1941           next_dst = dst_path + '/' + ent
1942         else:
1943           next_dst = ent
1944         self.copy_descend(dumper, ctx, name, val, ent, parent_rev,
1945                           next_src, next_dst, is_tag, jit_new_rev)
1946
1947   def fill_name(self, dumper, ctx, name, is_tag, jit_new_rev=None):
1948     """Use DUMPER to create all currently available parts of symbolic
1949     name NAME that have not been created already.
1950
1951     If IS_TAG is true, NAME is treated as a tag, else as a branch.
1952
1953     JIT_NEW_REV is as documented for the copy_descend() function."""
1954
1955     # A source path looks like this in the symbolic name tree:
1956     #
1957     #    thisbranch/trunk/proj/foo/bar/baz.c
1958     #
1959     # ...or occasionally...
1960     #
1961     #    thisbranch/branches/sourcebranch/proj/foo/bar/baz.c
1962     #
1963     # (the latter when 'thisbranch' is branched off 'sourcebranch').
1964     #
1965     # Meanwhile, we're copying to a location in the repository like
1966     #
1967     #    /branches/thisbranch/proj/foo/bar/baz.c    or
1968     #    /tags/tagname/proj/foo/bar/baz.c
1969     #
1970     # Of course all this depends on make_path()'s behavior.  At
1971     # various times we've changed the way it produces paths (see
1972     # revisions 6028 and 6347).  If it changes again, the logic here
1973     # must be adjusted to match.
1974
1975     parent_key = self.root_key
1976     parent = self.db[parent_key]
1977
1978     # If there are no origin records, then we must've messed up earlier.
1979     if not parent.has_key(name):
1980       if is_tag:
1981         sys.stderr.write("%s: no origin records for tag '%s'.\n"
1982                          % (error_prefix, name))
1983       else:
1984         sys.stderr.write("%s: no origin records for branch '%s'.\n"
1985                          % (error_prefix, name))
1986       sys.exit(1)
1987
1988     parent_key = parent[name]
1989     parent = self.db[parent_key]
1990
1991     # All Subversion source paths under the branch start with one of
1992     # three things:
1993     #
1994     #   /trunk/...
1995     #   /branches/foo/...
1996     #   /tags/foo/...
1997     #
1998     # (We don't care what foo is, it's just a component to skip over.)
1999     #
2000     # Since these don't all have the same number of components, we
2001     # manually descend into each as far as necessary, then invoke
2002     # copy_descend() once we're in the right place in both trees.
2003     #
2004     # Since it's possible for a branch or tag to have some source
2005     # paths on trunk and some on branches, there's some question about
2006     # what to copy as the top-level directory of the branch.  Our
2007     # solution is to [somewhat randomly] give preference to trunk.
2008     # Note that none of these paths can ever conflict; for example,
2009     # it would be impossible to have both
2010     #
2011     #   thisbranch/trunk/myproj/lib/drivers.c                   and
2012     #   thisbranch/branches/sourcebranch/myproj/lib/drivers.c
2013     #
2014     # because that would imply that the symbolic name 'thisbranch'
2015     # appeared twice in the RCS file header, referring to two
2016     # different revisions.  Well, I suppose that's *possible*, but its
2017     # effect is undefined, and it's as reasonable for us to just
2018     # overwrite one with the other as anything else -- anyway, isn't
2019     # that what CVS would do if you checked out the branch?  <shrug>
2020
2021     if parent.has_key(ctx.trunk_base):
2022       self.copy_descend(dumper, ctx, name, parent, ctx.trunk_base,
2023                         SVN_INVALID_REVNUM, ctx.trunk_base, "",
2024                         is_tag, jit_new_rev)
2025     if parent.has_key(ctx.branches_base):
2026       branch_base_key = parent[ctx.branches_base]
2027       branch_base = self.db[branch_base_key]
2028       for this_source in branch_base.keys():
2029         # We skip special names beginning with '/' for the usual
2030         # reason.  We skip cases where (this_source == name) for a
2031         # different reason: if a CVS branch were rooted in itself,
2032         # that would imply that the same symbolic name appeared on two
2033         # different branches in an RCS file, which CVS doesn't
2034         # permit.  So while it wouldn't hurt to descend, it would be a
2035         # waste of time.
2036         if (this_source[0] != '/') and (this_source != name):
2037           src_path = ctx.branches_base + '/' + this_source
2038           self.copy_descend(dumper, ctx, name, branch_base, this_source,
2039                             SVN_INVALID_REVNUM, src_path, "",
2040                             is_tag, jit_new_rev)
2041
2042   def fill_tag(self, dumper, ctx, tag, jit_new_rev=None):
2043     """Use DUMPER to create all currently available parts of TAG that
2044     have not been created already.  Use CTX.trunk_base, CTX.tags_base,
2045     and CTX.branches_base to determine the source and destination
2046     paths in the Subversion repository.
2047
2048     JIT_NEW_REV is as documented for the copy_descend() function."""
2049     self.fill_name(dumper, ctx, tag, 1, jit_new_rev)
2050
2051   def fill_branch(self, dumper, ctx, branch, jit_new_rev=None):
2052     """Use DUMPER to create all currently available parts of BRANCH that
2053     haven't been created already.  Use CTX.trunk_base, CTX.tags_base,
2054     and CTX.branches_base to determine the source and destination
2055     paths in the Subversion repository.
2056
2057     JIT_NEW_REV is as documented for the copy_descend() function."""
2058     self.fill_name(dumper, ctx, branch, None, jit_new_rev)
2059
2060   def finish(self, dumper, ctx):
2061     """Use DUMPER to finish branches and tags that have either
2062     not been created yet, or have been only partially created.
2063     Use CTX.trunk_base, CTX.tags_base, and CTX.branches_base to
2064     determine the source and destination paths in the Subversion
2065     repository."""
2066     parent_key = self.root_key
2067     parent = self.db[parent_key]
2068     # Do all branches first, then all tags.  We don't bother to check
2069     # here whether a given name is a branch or a tag, or is done
2070     # already; the fill_foo() methods will just do nothing if there's
2071     # nothing to do.
2072     #
2073     # We do one revision per branch or tag, for clarity to users, not
2074     # for correctness.  In CVS, when you make a branch off a branch,
2075     # the new branch will just root itself in the roots of the old
2076     # branch *except* where the new branch sprouts from a revision
2077     # that was actually committed on the old branch.  In the former
2078     # cases, the source paths will be the same as the source paths
2079     # from which the old branch was created and therefore will already
2080     # exist; and in the latter case, the source paths will actually be
2081     # on the old branch, but those paths will exist already because
2082     # they were commits on that branch and therefore cvs2svn must have
2083     # created it already (see the fill_branch call in Commit.commit).
2084     # So either way, the source paths exist by the time we need them.
2085     #
2086     ### It wouldn't be so awfully hard to determine whether a name is
2087     ### just a branch or just a tag, which would allow for more
2088     ### intuitive messages below.
2089     if not ctx.trunk_only:
2090       print "Finishing branches:"
2091       for name in parent.keys():
2092         if name[0] != '/':
2093           print "finishing '%s' as branch" % name
2094           self.fill_branch(dumper, ctx, name, [1])
2095       print "Finishing tags:"
2096       for name in parent.keys():
2097         if name[0] != '/':
2098           print "finishing '%s' as tag" % name
2099           self.fill_tag(dumper, ctx, name, [1])
2100
2101
2102 def is_trunk_vendor_revision(default_branches_db, cvs_path, cvs_rev):
2103   """Return 1 if CVS_REV of CVS_PATH is a trunk (i.e., head) vendor
2104   revision according to DEFAULT_BRANCHES_DB, else return None."""
2105   if default_branches_db.has_key(cvs_path):
2106     val = default_branches_db[cvs_path]
2107     val_last_dot = val.rindex(".")
2108     received_last_dot = cvs_rev.rindex(".")
2109     default_branch = val[:val_last_dot]
2110     received_branch = cvs_rev[:received_last_dot]
2111     default_rev_component = int(val[val_last_dot + 1:])
2112     received_rev_component = int(cvs_rev[received_last_dot + 1:])
2113     if (default_branch == received_branch
2114         and received_rev_component <= default_rev_component):
2115       return 1
2116   # else
2117   return None
2118
2119
2120 class Commit:
2121   def __init__(self, author, log):
2122     self.author = author
2123     self.log = log
2124
2125     self.files = { }
2126
2127     # For consistency, the elements of both lists are of the form
2128     #
2129     #   (file, rev, deltatext_code, branch_name, tags, branches)
2130     #
2131     # even though self.deletes doesn't use the deltatext_code.
2132     self.changes = [ ]
2133     self.deletes = [ ]
2134
2135     # Start out with a t_min higher than any incoming time T, and a
2136     # t_max lower than any incoming T.  This way the first T will
2137     # push t_min down to T, and t_max up to T, naturally (without any
2138     # special-casing), and successive times will then ratchet them
2139     # outward as appropriate.
2140     self.t_min = 1L<<32
2141     self.t_max = 0
2142
2143   def __cmp__(self, other):
2144     # Commits should be sorted by t_max.  If both self and other have
2145     # the same t_max, break the tie using t_min.
2146     return cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2147
2148   def has_file(self, fname):
2149     return self.files.has_key(fname)
2150
2151   def add(self, t, op, file, rev, deltatext_code, branch_name, tags, branches):
2152     # Record the time range of this commit.
2153     #
2154     # ### ISSUE: It's possible, though unlikely, that the time range
2155     # of a commit could get gradually expanded to be arbitrarily
2156     # longer than COMMIT_THRESHOLD.  I'm not sure this is a huge
2157     # problem, and anyway deciding where to break it up would be a
2158     # judgement call. For now, we just print a warning in commit() if
2159     # this happens.
2160     if t < self.t_min:
2161       self.t_min = t
2162     if t > self.t_max:
2163       self.t_max = t
2164
2165     if op == OP_CHANGE:
2166       self.changes.append((file, rev, deltatext_code, branch_name,
2167                            tags, branches))
2168     else:
2169       # OP_DELETE
2170       self.deletes.append((file, rev, deltatext_code, branch_name,
2171                            tags, branches))
2172     self.files[file] = 1
2173
2174   def commit(self, dumper, ctx, sym_tracker):
2175     # commit this transaction
2176     seconds = self.t_max - self.t_min
2177     print 'committing: %s, over %d seconds' % (time.ctime(self.t_min), seconds)
2178     if seconds > COMMIT_THRESHOLD:
2179       print '%s: commit spans more than %d seconds' \
2180             % (warning_prefix, COMMIT_THRESHOLD)
2181
2182     if ctx.dry_run:
2183       for f, r, dt_code, br, tags, branches in self.changes:
2184         # compute a repository path, dropping the ,v from the file name
2185         svn_path = make_path(ctx, relative_name(ctx.cvsroot, f[:-2]), br)
2186         print "    adding or changing '%s' : '%s'" % (r, svn_path)
2187       for f, r, dt_code, br, tags, branches in self.deletes:
2188         # compute a repository path, dropping the ,v from the file name
2189         svn_path = make_path(ctx, relative_name(ctx.cvsroot, f[:-2]), br)
2190         print "    deleting '%s' : '%s'" % (r, svn_path)
2191       print '    (skipped; dry run enabled)'
2192       return
2193
2194     do_copies = [ ]
2195
2196     # State for handling default branches.
2197     #
2198     # Here is a tempting, but ultimately nugatory, bit of logic, which
2199     # I share with you so you may appreciate the less attractive, but
2200     # refreshingly non-nugatory, logic which follows it:
2201     #
2202     # If some of the commits in this txn happened on a non-trunk
2203     # default branch, then those files will have to be copied into
2204     # trunk manually after being changed on the branch (because the
2205     # RCS "default branch" appears as head, i.e., trunk, in practice).
2206     # As long as those copies don't overwrite any trunk paths that
2207     # were also changed in this commit, then we can do the copies in
2208     # the same revision, because they won't cover changes that don't
2209     # appear anywhere/anywhen else.  However, if some of the trunk dst
2210     # paths *did* change in this commit, then immediately copying the
2211     # branch changes would lose those trunk mods forever.  So in this
2212     # case, we need to do at least that copy in its own revision.  And
2213     # for simplicity's sake, if we're creating the new revision for
2214     # even one file, then we just do all such copies together in the
2215     # new revision.
2216     #
2217     # Doesn't that sound nice?
2218     #
2219     # Unfortunately, Subversion doesn't support copies with sources
2220     # in the current txn.  All copies must be based in committed
2221     # revisions.  Therefore, we generate the above-described new
2222     # revision unconditionally.
2223     #
2224     # Each of these is a list of tuples.  Each tuple is of the form:
2225     #
2226     #   (cvs_path, branch_name, tags_rooted_here, branches_rooted_here)
2227     #
2228     # and a tuple is created for each default branch commit that will
2229     # need to be copied to trunk (or deleted from trunk) in the
2230     # generated revision following the "regular" revision.
2231     default_branch_copies  = [ ]
2232     default_branch_deletes = [ ]
2233
2234     # we already have the date, so just format it
2235     date = format_date(self.t_max)
2236     try:
2237       ### FIXME: The 'replace' behavior should be an option, like
2238       ### --encoding is.
2239       unicode_author = unicode(self.author, ctx.encoding, 'replace')
2240       unicode_log = unicode(self.log, ctx.encoding, 'replace')
2241       props = { 'svn:author' : unicode_author.encode('utf8'),
2242                 'svn:log' : unicode_log.encode('utf8'),
2243                 'svn:date' : date }
2244     except UnicodeError:
2245       print '%s: problem encoding author or log message:' % warning_prefix
2246       print "  author: '%s'" % self.author
2247       print "  log:    '%s'" % self.log
2248       print "  date:   '%s'" % date
2249       for rcs_file, cvs_rev, dt_code, br, tags, branches in self.changes:
2250         print "    rev %s of '%s'" % (cvs_rev, rcs_file)
2251       print "Consider rerunning with (for example) '--encoding=latin1'."
2252       # Just fall back to the original data.
2253       props = { 'svn:author' : self.author,
2254                 'svn:log' : self.log,
2255                 'svn:date' : date }
2256
2257
2258     # Tells whether we actually wrote anything to the dumpfile.
2259     svn_rev = SVN_INVALID_REVNUM
2260
2261     # If any of the changes we are about to do are on branches, we need to
2262     # check and maybe fill them (in their own revisions) *before* we start
2263     # then data revision. So we have to iterate over changes and deletes twice.
2264     for rcs_file, cvs_rev, dt_code, br, tags, branches in self.changes:
2265       # compute a repository path, dropping the ,v from the file name
2266       cvs_path = relative_name(ctx.cvsroot, rcs_file[:-2])
2267       svn_path = make_path(ctx, cvs_path, br)
2268       if br:
2269         ### FIXME: Here is an obvious optimization point.  Probably
2270         ### dump.probe_path(PATH) is kind of slow, because it does N
2271         ### database lookups for the N components in PATH.  If this
2272         ### turns out to be a performance bottleneck, we can just
2273         ### maintain a database mirroring just the head tree, but
2274         ### keyed on full paths, to reduce the check to a quick
2275         ### constant time query.
2276         if not dumper.probe_path(svn_path):
2277           sym_tracker.fill_branch(dumper, ctx, br, [1, date])
2278
2279     for rcs_file, cvs_rev, dt_code, br, tags, branches in self.deletes:
2280       # compute a repository path, dropping the ,v from the file name
2281       cvs_path = relative_name(ctx.cvsroot, rcs_file[:-2])
2282       svn_path = make_path(ctx, cvs_path, br)
2283       if br:
2284         ### FIXME: Here is an obvious optimization point.  Probably
2285         ### dump.probe_path(PATH) is kind of slow, because it does N
2286         ### database lookups for the N components in PATH.  If this
2287         ### turns out to be a performance bottleneck, we can just
2288         ### maintain a database mirroring just the head tree, but
2289         ### keyed on full paths, to reduce the check to a quick
2290         ### constant time query.
2291         if not dumper.probe_path(svn_path):
2292           sym_tracker.fill_branch(dumper, ctx, br, [1, date])
2293
2294
2295     # Now that any branches we need exist, we can do the commits.
2296     for rcs_file, cvs_rev, dt_code, br, tags, branches in self.changes:
2297       # compute a repository path, dropping the ,v from the file name
2298       cvs_path = relative_name(ctx.cvsroot, rcs_file[:-2])
2299       svn_path = make_path(ctx, cvs_path, br)
2300       if svn_rev == SVN_INVALID_REVNUM:
2301         svn_rev = dumper.start_revision(props)
2302       sym_tracker.enroot_tags(svn_path, svn_rev, tags)
2303       sym_tracker.enroot_branches(svn_path, svn_rev, branches)
2304       print "    adding or changing %s : '%s'" % (cvs_rev, svn_path)
2305
2306       # Only make a change if we need to.  When 1.1.1.1 has an empty
2307       # deltatext, the explanation is almost always that we're looking
2308       # at an imported file whose 1.1 and 1.1.1.1 are identical.  On
2309       # such imports, CVS creates an RCS file where 1.1 has the
2310       # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2311       # content as 1.1.  There's no reason to reflect this non-change
2312       # in the repository, so we want to do nothing in this case.  (If
2313       # we were really paranoid, we could make sure 1.1's log message
2314       # is the CVS-generated "Initial revision\n", but I think the
2315       # conditions below are strict enough.)
2316       if not ((dt_code == DELTATEXT_EMPTY) and (cvs_rev == "1.1.1.1")
2317               and dumper.probe_path(svn_path)):
2318         closed_tags, closed_branches = \
2319                      dumper.add_or_change_path(cvs_path,
2320                                                svn_path,
2321                                                cvs_rev,
2322                                                rcs_file,
2323                                                tags,
2324                                                branches,
2325                                                ctx)
2326         if is_trunk_vendor_revision(ctx.default_branches_db,
2327                                     cvs_path, cvs_rev):
2328           default_branch_copies.append((cvs_path, br, tags, branches))
2329         sym_tracker.close_tags(svn_path, svn_rev, closed_tags)
2330         sym_tracker.close_branches(svn_path, svn_rev, closed_branches)
2331
2332     for rcs_file, cvs_rev, dt_code, br, tags, branches in self.deletes:
2333       # compute a repository path, dropping the ,v from the file name
2334       cvs_path = relative_name(ctx.cvsroot, rcs_file[:-2])
2335       svn_path = make_path(ctx, cvs_path, br)
2336       print "    deleting %s : '%s'" % (cvs_rev, svn_path)
2337       if svn_rev == SVN_INVALID_REVNUM:
2338         svn_rev = dumper.start_revision(props)
2339       # Uh, can this even happen on a deleted path?  Hmmm.  If not,
2340       # there's no risk, since tags and branches would just be empty
2341       # and therefore enrooting would be a no-op.  Still, it would
2342       # be clearer to know for sure and simply not call it.
2343       sym_tracker.enroot_tags(svn_path, svn_rev, tags)
2344       sym_tracker.enroot_branches(svn_path, svn_rev, branches)
2345       ### FIXME: this will return path_deleted == None if no path
2346       ### was deleted.  But we'll already have started the revision
2347       ### by then, so it's a bit late to use the knowledge!  Need to
2348       ### reorganize things so that starting the revision is a
2349       ### callback with its own internal conditional, so anyone can
2350       ### just invoke when they know they're really about to do
2351       ### something.
2352       ###
2353       ### Right now what happens is we get an empty revision
2354       ### (assuming nothing else happened in this revision).
2355       path_deleted, closed_tags, closed_branches = \
2356                     dumper.delete_path(svn_path, tags, branches, ctx.prune)
2357       if is_trunk_vendor_revision(ctx.default_branches_db, cvs_path, cvs_rev):
2358         default_branch_deletes.append((cvs_path, br, tags, branches))
2359       sym_tracker.close_tags(svn_path, svn_rev, closed_tags)
2360       sym_tracker.close_branches(svn_path, svn_rev, closed_branches)
2361
2362     if svn_rev == SVN_INVALID_REVNUM:
2363       print '    no new revision created, as nothing to do'
2364     else:
2365       print '    new revision:', svn_rev
2366       if default_branch_copies or default_branch_deletes:
2367         previous_rev = svn_rev
2368         msg = 'This commit was generated by cvs2svn to compensate for '     \
2369               'changes in r%d,\n'                                           \
2370               'which included commits to RCS files with non-trunk default ' \
2371               'branches.\n' % previous_rev
2372         props = { 'svn:author' : 'cvs2svn',
2373                   'svn:log' : msg,
2374                   'svn:date' : date }
2375         svn_rev = dumper.start_revision(props)
2376
2377         for cvs_path, br, tags, branches in default_branch_copies:
2378           src_path = make_path(ctx, cvs_path, br)
2379           dst_path = make_path(ctx, cvs_path)
2380           if (dumper.probe_path(dst_path)):
2381             ign, closed_tags, closed_branches = \
2382                  dumper.delete_path(dst_path, tags, branches, ctx.prune)
2383             sym_tracker.close_tags(dst_path, svn_rev, closed_tags)
2384             sym_tracker.close_branches(dst_path, svn_rev, closed_branches)
2385           dumper.copy_path(src_path, previous_rev, dst_path)
2386
2387         for cvs_path, br, tags, branches in default_branch_deletes:
2388           # Ignore the branch -- we don't need to know the default
2389           # branch, we already know we're deleting this from trunk.
2390           dst_path = make_path(ctx, cvs_path)
2391           if (dumper.probe_path(dst_path)):
2392             ign, closed_tags, closed_branches = \
2393                  dumper.delete_path(dst_path, tags, branches, ctx.prune)
2394             sym_tracker.close_tags(dst_path, svn_rev, closed_tags)
2395             sym_tracker.close_branches(dst_path, svn_rev, closed_branches)
2396
2397
2398 def read_resync(fname):
2399   "Read the .resync file into memory."
2400
2401   ### note that we assume that we can hold the entire resync file in
2402   ### memory. really large repositories with whacky timestamps could
2403   ### bust this assumption. should that ever happen, then it is possible
2404   ### to split the resync file into pieces and make multiple passes,
2405   ### using each piece.
2406
2407   #
2408   # A digest maps to a sequence of lists which specify a lower and upper
2409   # time bound for matching up the commit. We keep a sequence of these
2410   # because a number of checkins with the same log message (e.g. an empty
2411   # log message) could need to be remapped. We also make them a list because
2412   # we will dynamically expand the lower/upper bound as we find commits
2413   # that fall into a particular msg and time range.
2414   #
2415   # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
2416   #
2417   resync = { }
2418
2419   for line in fileinput.FileInput(fname):
2420     t1 = int(line[:8], 16)
2421     digest = line[9:DIGEST_END_IDX]
2422     t2 = int(line[DIGEST_END_IDX+1:], 16)
2423     t1_l = t1 - COMMIT_THRESHOLD/2
2424     t1_u = t1 + COMMIT_THRESHOLD/2
2425     if resync.has_key(digest):
2426       resync[digest].append([t1_l, t1_u, t2])
2427     else:
2428       resync[digest] = [ [t1_l, t1_u, t2] ]
2429
2430   # For each digest, sort the resync items in it in increasing order,
2431   # based on the lower time bound.
2432   digests = resync.keys()
2433   for digest in digests:
2434     (resync[digest]).sort()
2435
2436   return resync
2437
2438
2439 def parse_revs_line(line):
2440   data = line.split(' ', 7)
2441   timestamp = int(data[0], 16)
2442   id = data[1]
2443   op = data[2]
2444   rev = data[3]
2445   deltatext_code = data[4]
2446   branch_name = data[5]
2447   if branch_name == "*":
2448     branch_name = None
2449   ntags = int(data[6])
2450   tags = data[7].split(' ', ntags + 1)
2451   nbranches = int(tags[ntags])
2452   branches = tags[ntags + 1].split(' ', nbranches)
2453   fname = branches[nbranches][:-1]  # strip \n
2454   tags = tags[:ntags]
2455   branches = branches[:nbranches]
2456
2457   return timestamp, id, op, rev, deltatext_code, \
2458          fname, branch_name, tags, branches
2459
2460
2461 def write_revs_line(output, timestamp, digest, op, revision,
2462                     deltatext_code, fname, branch_name, tags, branches):
2463   output.write('%08lx %s %s %s %s ' % \
2464                (timestamp, digest, op, revision, deltatext_code))
2465   if not branch_name:
2466     branch_name = "*"
2467   output.write('%s ' % branch_name)
2468   output.write('%d ' % (len(tags)))
2469   for tag in tags:
2470     output.write('%s ' % (tag))
2471   output.write('%d ' % (len(branches)))
2472   for branch in branches:
2473     output.write('%s ' % (branch))
2474   output.write('%s\n' % fname)
2475
2476
2477 def pass1(ctx):
2478   cd = CollectData(ctx.cvsroot, DATAFILE, ctx.default_branches_db)
2479   p = rcsparse.Parser()
2480   stats = [ 0 ]
2481   os.path.walk(ctx.cvsroot, visit_file, (cd, p, stats))
2482   if ctx.verbose:
2483     print 'processed', stats[0], 'files'
2484   if len(cd.fatal_errors) > 0:
2485     sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
2486              + "Error summary:\n"
2487              + "\n".join(cd.fatal_errors)
2488              + "\nExited due to fatal error(s).")
2489
2490 def pass2(ctx):
2491   "Pass 2: clean up the revision information."
2492
2493   # We may have recorded some changes in revisions' timestamp. We need to
2494   # scan for any other files which may have had the same log message and
2495   # occurred at "the same time" and change their timestamps, too.
2496
2497   # read the resync data file
2498   resync = read_resync(ctx.log_fname_base + RESYNC_SUFFIX)
2499
2500   output = open(ctx.log_fname_base + CLEAN_REVS_SUFFIX, 'w')
2501
2502   # process the revisions file, looking for items to clean up
2503   for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
2504     timestamp, digest, op, rev, deltatext_code, fname, \
2505                branch_name, tags, branches = parse_revs_line(line)
2506     if not resync.has_key(digest):
2507       output.write(line)
2508       continue
2509
2510     # we have a hit. see if this is "near" any of the resync records we
2511     # have recorded for this digest [of the log message].
2512     for record in resync[digest]:
2513       if record[0] <= timestamp <= record[1]:
2514         # bingo! remap the time on this (record[2] is the new time).
2515         write_revs_line(output, record[2], digest, op, rev,
2516                         deltatext_code, fname, branch_name, tags, branches)
2517
2518         print "RESYNC: '%s' (%s) : old time='%s' new time='%s'" \
2519               % (relative_name(ctx.cvsroot, fname),
2520                  rev, time.ctime(timestamp), time.ctime(record[2]))
2521
2522         # adjust the time range. we want the COMMIT_THRESHOLD from the
2523         # bounds of the earlier/latest commit in this group.
2524         record[0] = min(record[0], timestamp - COMMIT_THRESHOLD/2)
2525         record[1] = max(record[1], timestamp + COMMIT_THRESHOLD/2)
2526
2527         # stop looking for hits
2528         break
2529     else:
2530       # the file/rev did not need to have its time changed.
2531       output.write(line)
2532
2533
2534 def pass3(ctx):
2535   # sort the log files
2536
2537   # GNU sort will sort our dates differently (incorrectly!) if our
2538   # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
2539   # it to 'C'
2540   if os.environ.has_key('LC_ALL'):
2541     lc_all_tmp = os.environ['LC_ALL']
2542   else:
2543     lc_all_tmp = None
2544   os.environ['LC_ALL'] = 'C'
2545   run_command('sort %s > %s' % (ctx.log_fname_base + CLEAN_REVS_SUFFIX,
2546                                 ctx.log_fname_base + SORTED_REVS_SUFFIX))
2547   if lc_all_tmp is None:
2548     del os.environ['LC_ALL']
2549   else:
2550     os.environ['LC_ALL'] = lc_all_tmp
2551
2552
2553 def pass4(ctx):
2554   sym_tracker = SymbolicNameTracker()
2555   metadata_db = Database(METADATA_DB, 'r')
2556
2557   # A dictionary of Commit objects, keyed by digest.  Each object
2558   # represents one logical commit, which may involve multiple files.
2559   #
2560   # The reason this is a dictionary, not a single object, is that
2561   # there may be multiple commits interleaved in time.  A commit can
2562   # span up to COMMIT_THRESHOLD seconds, which leaves plenty of time
2563   # for parts of some other commit to occur.  Since the s-revs file is
2564   # sorted by timestamp first, then by digest within each timestamp,
2565   # it's quite easy to have interleaved commits.
2566   commits = { }
2567
2568   # The total number of separate commits processed.  This is used only for
2569   # printing statistics, it does not affect the results in the repository.
2570   count = 0
2571
2572   # Start the dumpfile object.
2573   dumper = Dumper(ctx)
2574
2575   # process the logfiles, creating the target
2576   for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
2577     timestamp, id, op, rev, deltatext_code, fname, \
2578                branch_name, tags, branches = parse_revs_line(line)
2579
2580     if ctx.trunk_only and not trunk_rev.match(rev):
2581       ### note this could/should have caused a flush, but the next item
2582       ### will take care of that for us
2583       continue
2584
2585     # Each time we read a new line, we scan the commits we've
2586     # accumulated so far to see if any are ready for processing now.
2587     process = [ ]
2588     for scan_id, scan_c in commits.items():
2589       if scan_c.t_max + COMMIT_THRESHOLD < timestamp:
2590         process.append(scan_c)
2591         del commits[scan_id]
2592         continue
2593       # If the inbound commit is on the same file as a pending commit,
2594       # close the pending commit to further changes. Don't flush it though,
2595       # as there may be other pending commits dated before this one.
2596       # ### ISSUE: the has_file() check below is not optimal.
2597       # It does fix the dataloss bug where revisions would get lost
2598       # if checked in too quickly, but it can also break apart the
2599       # commits. The correct fix would require tracking the dependencies
2600       # between change sets and committing them in proper order.
2601       if scan_c.has_file(fname):
2602         unused_id = scan_id + '-'
2603         while commits.has_key(unused_id):
2604           unused_id = unused_id + '-'
2605         commits[unused_id] = scan_c
2606         del commits[scan_id]
2607
2608     # If there are any elements in 'process' at this point, they need
2609     # to be committed, because this latest rev couldn't possibly be
2610     # part of any of them.  Sort them into time-order, then commit 'em.
2611     process.sort()
2612     for c in process:
2613       c.commit(dumper, ctx, sym_tracker)
2614     count = count + len(process)
2615
2616     # Add this item into the set of still-available commits.
2617     if commits.has_key(id):
2618       c = commits[id]
2619     else:
2620       author, log = metadata_db[id]
2621       c = commits[id] = Commit(author, log)
2622     c.add(timestamp, op, fname, rev, deltatext_code, branch_name,
2623           tags, branches)
2624
2625   # End of the sorted revs file.  Flush any remaining commits:
2626   if commits:
2627     process = commits.values()
2628     process.sort()
2629     for  c in process:
2630       c.commit(dumper, ctx, sym_tracker)
2631     count = count + len(process)
2632
2633   # Create (or complete) any branches and tags not already done.
2634   sym_tracker.finish(dumper, ctx)
2635
2636   dumper.close()
2637
2638   if ctx.verbose:
2639     print count, 'commits processed.'
2640
2641
2642 def pass5(ctx):
2643   if ctx.skip_cleanup:
2644     return
2645
2646   # Remove our database files
2647   os.unlink(SVN_REVISIONS_DB)
2648   os.unlink(NODES_DB)
2649   os.unlink(SYMBOLIC_NAME_ROOTS_DB)
2650   os.unlink(SYMBOLIC_NAMES_DB)
2651   os.unlink(METADATA_DB)
2652
2653   # This is the only DB reference still reachable at this point; lose
2654   # it before removing the file.
2655   ctx.default_branches_db = None
2656   os.unlink(DEFAULT_BRANCHES_DB)
2657
2658   # Remove our other data files
2659   for suffix in (REVS_SUFFIX, CLEAN_REVS_SUFFIX,
2660                  SORTED_REVS_SUFFIX, RESYNC_SUFFIX):
2661     os.unlink('cvs2svn-data' + suffix)
2662
2663
2664 _passes = [
2665   pass1,
2666   pass2,
2667   pass3,
2668   pass4,
2669   pass5,
2670   ]
2671
2672
2673 class _ctx:
2674   pass
2675
2676
2677 class MimeMapper:
2678   "A class that provides mappings from file names to MIME types."
2679
2680   def _init_(self):
2681     self.mappings = { }
2682     self.missing_mappings = { }
2683
2684
2685   def set_mime_types_file(self, mime_types_file):
2686     for line in fileinput.input(mime_types_file):
2687       if line.startswith("#"):
2688         continue
2689
2690       # format of a line is something like
2691       # text/plain c h cpp
2692       extensions = line.split()
2693       if len(extensions) < 2:
2694         continue
2695       type = extensions.pop(0)
2696       for ext in extensions:
2697         if self.mappings.has_key(ext) and self.mappings[ext] != type:
2698           sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
2699                            % (warning_prefix, ext, self.mappings[ext], type))
2700         self.mappings[ext] = type
2701
2702
2703   def get_type_from_filename(self, filename):
2704     basename, extension = os.path.splitext(os.path.basename(filename))
2705
2706     # Extension includes the dot, so strip it (will leave extension
2707     # empty if filename ends with a dot, which is ok):
2708     extension = extension[1:]
2709
2710     # If there is no extension (or the file ends with a period), use
2711     # the base name for mapping. This allows us to set mappings for
2712     # files such as README or Makefile:
2713     if not extension:
2714       extension = basename
2715     if self.mappings.has_key(extension):
2716       return self.mappings[extension]
2717     self.missing_mappings[extension] = 1
2718     return None
2719
2720
2721   def print_missing_mappings(self):
2722     for ext in self.missing_mappings:
2723       sys.stderr.write("%s: no MIME mapping for *.%s\n" % (warning_prefix, ext))
2724
2725
2726 def convert(ctx, start_pass=1):
2727   "Convert a CVS repository to an SVN repository."
2728
2729   if not os.path.exists(ctx.cvsroot):
2730     sys.stderr.write(error_prefix + ': \'%s\' does not exist.\n' % ctx.cvsroot)
2731     sys.exit(1)
2732
2733   times = [ None ] * len(_passes)
2734   for i in range(start_pass - 1, len(_passes)):
2735     times[i] = time.time()
2736     print '----- pass %d -----' % (i + 1)
2737     _passes[i](ctx)
2738   times.append(time.time())
2739
2740   for i in range(start_pass, len(_passes)+1):
2741     print 'pass %d: %d seconds' % (i, int(times[i] - times[i-1]))
2742   print ' total:', int(times[len(_passes)] - times[start_pass-1]), 'seconds'
2743
2744
2745 def usage(ctx):
2746   print 'USAGE: %s [-n] [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
2747         % os.path.basename(sys.argv[0])
2748   print '  --help, -h           print this usage message and exit with success'
2749   print '  -n                   dry run; parse CVS repos, but do not construct SVN repos'
2750   print '  -v                   verbose'
2751   print '  -s PATH              path for SVN repos'
2752   print '  -p NUM               start at pass NUM of %d' % len(_passes)
2753   print '  --existing-svnrepos  load into existing SVN repository'
2754   print '  --dumpfile=PATH      name of intermediate svn dumpfile'
2755   print '  --svnadmin=PATH      path to the svnadmin program'
2756   print '  --trunk-only         convert only trunk commits, not tags nor branches'
2757   print '  --trunk=PATH         path for trunk (default: %s)'    \
2758         % ctx.trunk_base
2759   print '  --branches=PATH      path for branches (default: %s)' \
2760         % ctx.branches_base
2761   print '  --tags=PATH          path for tags (default: %s)'     \
2762         % ctx.tags_base
2763   print '  --no-prune           don\'t prune empty directories'
2764   print '  --dump-only          just produce a dumpfile, don\'t commit to a repos'
2765   print '  --encoding=ENC       encoding of log messages in CVS repos (default: %s)' \
2766         % ctx.encoding
2767   print '  --username=NAME      username for cvs2svn-synthesized commits'
2768   print '                                                  (default: %s)' \
2769         % ctx.username
2770   print '  --skip-cleanup       prevent the deletion of intermediate files'
2771   print '  --bdb-txn-nosync     pass --bdb-txn-nosync to "svnadmin create"'
2772   print '  --cvs-revnums        record CVS revision numbers as file properties'
2773   print '  --mime-types=FILE    specify an apache-style mime.types file for\n' \
2774         '                       setting svn:mime-type'
2775   print '  --set-eol-style      automatically set svn:eol-style=native for\n' \
2776         '                       text files (needs --mime-types)'
2777
2778
2779 def main():
2780   # prepare the operation context
2781   ctx = _ctx()
2782   ctx.cvsroot = None
2783   ctx.target = None
2784   ctx.log_fname_base = DATAFILE
2785   ctx.dumpfile = DUMPFILE
2786   ctx.verbose = 0
2787   ctx.dry_run = 0
2788   ctx.prune = 1
2789   ctx.existing_svnrepos = 0
2790   ctx.dump_only = 0
2791   ctx.trunk_only = 0
2792   ctx.trunk_base = "trunk"
2793   ctx.tags_base = "tags"
2794   ctx.branches_base = "branches"
2795   ctx.encoding = "ascii"
2796   ctx.mime_types_file = None
2797   ctx.mime_mapper = None
2798   ctx.set_eol_style = 0
2799   ctx.svnadmin = "svnadmin"
2800   ctx.username = "unknown"
2801   ctx.print_help = 0
2802   ctx.skip_cleanup = 0
2803   ctx.cvs_revnums = 0
2804   ctx.bdb_txn_nosync = 0
2805
2806   start_pass = 1
2807
2808   try:
2809     opts, args = getopt.getopt(sys.argv[1:], 'p:s:vnh',
2810                                [ "help", "create", "trunk=",
2811                                  "username=", "existing-svnrepos",
2812                                  "branches=", "tags=", "encoding=",
2813                                  "mime-types=", "set-eol-style",
2814                                  "trunk-only", "no-prune",
2815                                  "dump-only", "dumpfile=", "svnadmin=",
2816                                  "skip-cleanup", "cvs-revnums",
2817                                  "bdb-txn-nosync"])
2818   except getopt.GetoptError, e:
2819     sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
2820     usage(ctx)
2821     sys.exit(1)
2822
2823   for opt, value in opts:
2824     if opt == '-p':
2825       start_pass = int(value)
2826       if start_pass < 1 or start_pass > len(_passes):
2827         print '%s: illegal value (%d) for starting pass. ' \
2828               'must be 1 through %d.' % (error_prefix, start_pass,
2829                                          len(_passes))
2830         sys.exit(1)
2831     elif (opt == '--help') or (opt == '-h'):
2832       ctx.print_help = 1
2833     elif opt == '-v':
2834       ctx.verbose = 1
2835     elif opt == '-n':
2836       ctx.dry_run = 1
2837     elif opt == '-s':
2838       ctx.target = value
2839     elif opt == '--existing-svnrepos':
2840       ctx.existing_svnrepos = 1
2841     elif opt == '--dumpfile':
2842       ctx.dumpfile = value
2843     elif opt == '--svnadmin':
2844       ctx.svnadmin = value
2845     elif opt == '--trunk-only':
2846       ctx.trunk_only = 1
2847     elif opt == '--trunk':
2848       ctx.trunk_base = value
2849     elif opt == '--branches':
2850       ctx.branches_base = value
2851     elif opt == '--tags':
2852       ctx.tags_base = value
2853     elif opt == '--no-prune':
2854       ctx.prune = None
2855     elif opt == '--dump-only':
2856       ctx.dump_only = 1
2857     elif opt == '--encoding':
2858       ctx.encoding = value
2859     elif opt == '--mime-types':
2860       ctx.mime_types_file = value
2861     elif opt == '--set-eol-style':
2862       ctx.set_eol_style = 1
2863     elif opt == '--username':
2864       ctx.username = value
2865     elif opt == '--skip-cleanup':
2866       ctx.skip_cleanup = 1
2867     elif opt == '--cvs-revnums':
2868       ctx.cvs_revnums = 1
2869     elif opt == '--bdb-txn-nosync':
2870       ctx.bdb_txn_nosync = 1
2871     elif opt == '--create':
2872       sys.stderr.write(warning_prefix +
2873           ': The behaviour produced by the --create option is now the '
2874           'default,\nand passing the option is deprecated.\n')
2875
2876   if ctx.print_help:
2877     usage(ctx)
2878     sys.exit(0)
2879
2880   # Consistency check for options and arguments.
2881   if len(args) == 0:
2882     usage(ctx)
2883     sys.exit(1)
2884
2885   if len(args) > 1:
2886     sys.stderr.write(error_prefix +
2887                      ": must pass only one CVS repository.\n")
2888     usage(ctx)
2889     sys.exit(1)
2890
2891   ctx.cvsroot = args[0]
2892
2893   if not os.path.isdir(ctx.cvsroot):
2894     sys.stderr.write(error_prefix +
2895                      ": the cvs-repos-path '%s' is not an "
2896                      "existing directory.\n" % ctx.cvsroot)
2897     sys.exit(1)
2898
2899   if (not ctx.target) and (not ctx.dump_only):
2900     sys.stderr.write(error_prefix +
2901                      ": must pass one of '-s' or '--dump-only'.\n")
2902     sys.exit(1)
2903
2904   def not_both(opt1val, opt1name, opt2val, opt2name):
2905     if opt1val and opt2val:
2906       sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
2907           % (opt1name, opt2name))
2908
2909   not_both(ctx.target, '-s', ctx.dump_only, '--dump-only')
2910
2911   not_both(ctx.dump_only, '--dump-only',
2912     ctx.existing_svnrepos, '--existing-svnrepos')
2913
2914   not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
2915     ctx.existing_svnrepos, '--existing-svnrepos')
2916
2917   not_both(ctx.dump_only, '--dump-only',
2918     ctx.bdb_txn_nosync, '--bdb-txn-nosync')
2919
2920   if ((string.find(ctx.trunk_base, '/') > -1)
2921       or (string.find(ctx.tags_base, '/') > -1)
2922       or (string.find(ctx.branches_base, '/') > -1)):
2923     sys.stderr.write("%s: cannot pass multicomponent path to "
2924                      "--trunk, --tags, or --branches yet.\n"
2925                      "  See http://subversion.tigris.org/issues/show_bug.cgi?"
2926                      "id=1409 "
2927                      "for details.\n" % error_prefix)
2928     sys.exit(1)
2929
2930   if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
2931     sys.stderr.write(error_prefix +
2932                      ": the svn-repos-path '%s' is not an "
2933                      "existing directory.\n" % ctx.target)
2934     sys.exit(1)
2935
2936   if not ctx.dump_only and not ctx.existing_svnrepos \
2937       and os.path.exists(ctx.target):
2938     sys.stderr.write(error_prefix +
2939                      ": the svn-repos-path '%s' exists.\nRemove it, or pass "
2940                      "'--existing-svnrepos'.\n" % ctx.target)
2941     sys.exit(1)
2942
2943   if ctx.set_eol_style and not ctx.mime_types_file:
2944     sys.stderr.write(error_prefix +
2945                      ": can only pass '--set-eol-style' if you also pass"
2946                      " '--mime-types'.\n")
2947     sys.exit(1)
2948
2949   if ctx.mime_types_file:
2950     ctx.mime_mapper = MimeMapper()
2951     ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
2952
2953   # Lock the current directory for temporary files.
2954   try:
2955     os.mkdir('cvs2svn.lock')
2956   except OSError:
2957     sys.stderr.write(error_prefix +
2958         ": cvs2svn writes temporary files to the current working directory.\n"
2959         "  The directory 'cvs2svn.lock' exists, indicating that another\n"
2960         "  cvs2svn process is currently using the current directory for its\n"
2961         "  temporary workspace. If you are certain that is not the case,\n"
2962         "  remove the 'cvs2svn.lock' directory.\n")
2963     sys.exit(1)
2964   try:
2965     ctx.default_branches_db = Database(DEFAULT_BRANCHES_DB, 'n')
2966     convert(ctx, start_pass=start_pass)
2967   finally:
2968     try: os.rmdir('cvs2svn.lock')
2969     except: pass
2970
2971   if ctx.mime_types_file:
2972     ctx.mime_mapper.print_missing_mappings()
2973
2974 if __name__ == '__main__':
2975   main()