* cvs2svn
[cvs2svn.git] / cvs2svn
blobeca3d25a4826eea02e4bde43ba530dd9bb9c6f68
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
37 # Warnings and errors start with these strings. They are typically
38 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
39 warning_prefix = "WARNING"
40 error_prefix = "ERROR"
42 # Make sure this Python is recent enough.
43 if sys.hexversion < 0x2000000:
44 sys.stderr.write("'%s: Python 2.0 or higher required, "
45 "see www.python.org.\n" % error_prefix)
46 sys.exit(1)
48 # Pretend we have true booleans on older python versions
49 try:
50 True
51 except:
52 True = 1
53 False = 0
55 # Minimal, incomplete, version of popen2.Popen3 for those platforms
56 # for which popen2 does not provide it.
57 try:
58 Popen3 = popen2.Popen3
59 except AttributeError:
60 class Popen3:
61 def __init__(self, cmd, capturestderr):
62 if type(cmd) != str:
63 cmd = " ".join(cmd)
64 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
65 mode='b')
66 def wait(self):
67 return self.fromchild.close() or self.tochild.close() or \
68 self.childerr.close()
70 # DBM module selection
72 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
73 # so that the dbhash module used by anydbm will use bsddb3.
74 try:
75 import bsddb3
76 sys.modules['bsddb'] = sys.modules['bsddb3']
77 except ImportError:
78 pass
80 # 2. These DBM modules are not good for cvs2svn.
81 import anydbm
82 if (anydbm._defaultmod.__name__ == 'dumbdbm'
83 or anydbm._defaultmod.__name__ == 'dbm'):
84 print 'ERROR: your installation of Python does not contain a suitable'
85 print ' DBM module. This script cannot continue.'
86 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
87 print ' for details.'
88 sys.exit(1)
90 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
91 # Unfortunately, gdbm appears not to be trouble free, either.
92 if hasattr(anydbm._defaultmod, 'bsddb') \
93 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
94 try:
95 gdbm = __import__('gdbm')
96 except ImportError:
97 sys.stderr.write(warning_prefix +
98 ': The version of the bsddb module found '
99 'on your computer has been reported to malfunction on some datasets, '
100 'causing KeyError exceptions. You may wish to upgrade your Python to '
101 'version 2.3 or later.\n')
102 else:
103 anydbm._defaultmod = gdbm
105 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
106 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
107 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
109 # This really only matches standard '1.1.1.*'-style vendor revisions.
110 # One could conceivably have a file whose default branch is 1.1.3 or
111 # whatever, or was that at some point in time, with vendor revisions
112 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
113 # is the only time this regexp gets used), we'd have no basis for
114 # assuming that the non-standard vendor branch had ever been the
115 # default branch anyway, so we don't want this to match them anyway.
116 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
118 # If this run's output is a repository, then (in the tmpdir) we use
119 # a dumpfile of this name for repository loads.
121 # If this run's output is a dumpfile, then this is default name of
122 # that dumpfile, but in the current directory (unless the user has
123 # specified a dumpfile path, of course, in which case it will be
124 # wherever the user said).
125 DUMPFILE = 'cvs2svn-dump'
127 # This file appears with different suffixes at different stages of
128 # processing. CVS revisions are cleaned and sorted here, for commit
129 # grouping. See design-notes.txt for details.
130 DATAFILE = 'cvs2svn-data'
132 # This file contains a marshalled copy of all the statistics that we
133 # gather throughout the various runs of cvs2svn. The data stored as a
134 # marshalled dictionary.
135 STATISTICS_FILE = 'cvs2svn-statistics'
137 # This text file contains records (1 per line) that describe svn
138 # filesystem paths that are the opening and closing source revisions
139 # for copies to tags and branches. The format is as follows:
141 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
143 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
144 # SVN_REVNUM are the primary and secondary sorting criteria for
145 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
146 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
147 # A sorted version of the above file.
148 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
150 # This file is a temporary file for storing symbolic_name -> closing
151 # CVSRevision until the end of our pass where we can look up the
152 # corresponding SVNRevNum for the closing revs and write these out to
153 # the SYMBOL_OPENINGS_CLOSINGS.
154 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
156 # Skeleton version of an svn filesystem.
157 # (These supersede and will eventually replace the two above.)
158 # See class SVNRepositoryMirror for how these work.
159 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
160 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
162 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
163 # SYMBOL_OPENINGS_CLOSINGS_SORTED
164 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
166 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
167 # the CVSRevision is the last such that is a source for those symbolic
168 # names. For example, if branch B's number is 1.3.0.2 in this CVS
169 # file, and this file's 1.3 is the latest (by date) revision among
170 # *all* CVS files that is a source for branch B, then the
171 # CVSRevision.unique_key() corresponding to this file at 1.3 would
172 # list at least B in its list.
173 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
175 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
176 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
177 ### the s-revs data in this database.
178 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
180 # Lists all symbolic names that are tags. Keys are strings (symbolic
181 # names), values are ignorable.
182 TAGS_DB = 'cvs2svn-tags.db'
184 # A list all tags. Each line consists of the tag name and the number
185 # of files in which it exists, separated by a space.
186 TAGS_LIST = 'cvs2svn-tags.txt'
188 # A list of all branches. The file is stored as a plain text file
189 # to make it easy to look at in an editor. Each line contains the
190 # branch name, the number of files where the branch is created, the
191 # commit count, and a list of tags and branches that are defined on
192 # revisions in the branch.
193 BRANCHES_LIST = 'cvs2svn-branches.txt'
195 # These two databases provide a bidirectional mapping between
196 # CVSRevision.unique_key()s and Subversion revision numbers.
198 # The first maps CVSRevision.unique_key() to a number; the values are
199 # not unique.
201 # The second maps a number to a list of CVSRevision.unique_key()s.
202 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
203 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
205 # This database maps svn_revnums to tuples of (symbolic_name, date).
207 # The svn_revnums are the revision numbers of all non-primary
208 # SVNCommits. No primary SVNCommit has a key in this database.
210 # The date is stored for all commits in this database.
212 # For commits that fill symbolic names, the symbolic_name is stored.
213 # For commits that default branch syncs, the symbolic_name is None.
214 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
216 # This database maps svn_revnums of a default branch synchronization
217 # commit to the svn_revnum of the primary SVNCommit that motivated it.
219 # (NOTE: Secondary commits that fill branches and tags also have a
220 # motivating commit, but we do not record it because it is (currently)
221 # not needed for anything.)
223 # This mapping is used when generating the log message for the commit
224 # that synchronizes the default branch with trunk.
225 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
227 # How many bytes to read at a time from a pipe. 128 kiB should be
228 # large enough to be efficient without wasting too much memory.
229 PIPE_READ_SIZE = 128 * 1024
231 # Record the default RCS branches, if any, for CVS filepaths.
233 # The keys are CVS filepaths, relative to the top of the repository
234 # and with the ",v" stripped off, so they match the cvs paths used in
235 # Commit.commit(). The values are vendor branch revisions, such as
236 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
237 # represents the highest vendor branch revision thought to have ever
238 # been head of the default branch.
240 # The reason we record a specific vendor revision, rather than a
241 # default branch number, is that there are two cases to handle:
243 # One case is simple. The RCS file lists a default branch explicitly
244 # in its header, such as '1.1.1'. In this case, we know that every
245 # revision on the vendor branch is to be treated as head of trunk at
246 # that point in time.
248 # But there's also a degenerate case. The RCS file does not currently
249 # have a default branch, yet we can deduce that for some period in the
250 # past it probably *did* have one. For example, the file has vendor
251 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
252 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
253 # case, we should record 1.1.1.96 as the last vendor revision to have
254 # been the head of the default branch.
255 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
257 # Records the author and log message for each changeset.
258 # The keys are author+log digests, the same kind used to identify
259 # unique revisions in the .revs, etc files. Each value is a tuple
260 # of two elements: '(author logmessage)'.
261 METADATA_DB = "cvs2svn-metadata.db"
263 REVS_SUFFIX = '.revs'
264 CLEAN_REVS_SUFFIX = '.c-revs'
265 SORTED_REVS_SUFFIX = '.s-revs'
266 RESYNC_SUFFIX = '.resync'
268 SVN_INVALID_REVNUM = -1
270 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
272 # Things that can happen to a file.
273 OP_NOOP = '-'
274 OP_ADD = 'A'
275 OP_DELETE = 'D'
276 OP_CHANGE = 'C'
278 # A deltatext either does or doesn't represent some change.
279 DELTATEXT_NONEMPTY = 'N'
280 DELTATEXT_EMPTY = 'E'
282 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
284 # Constants used in SYMBOL_OPENINGS_CLOSINGS
285 OPENING = 'O'
286 CLOSING = 'C'
288 def temp(basename):
289 """Return a path to BASENAME in Ctx().tmpdir.
290 This is a convenience function to save horizontal space in source."""
291 return os.path.join(Ctx().tmpdir, basename)
293 # Since the unofficial set also includes [/\] we need to translate those
294 # into ones that don't conflict with Subversion limitations.
295 def _clean_symbolic_name(name):
296 """Return symbolic name NAME, translating characters that Subversion
297 does not allow in a pathname."""
298 name = name.replace('/','++')
299 name = name.replace('\\','--')
300 return name
302 def _path_join(*components):
303 """Join two or more pathname COMPONENTS, inserting '/' as needed.
304 Empty component are skipped."""
305 return string.join(filter(None, components), '/')
307 def run_command(command):
308 if os.system(command):
309 sys.exit('Command failed: "%s"' % command)
311 def relative_name(cvsroot, fname):
312 l = len(cvsroot)
313 if fname[:l] == cvsroot:
314 if fname[l] == os.sep:
315 return string.replace(fname[l+1:], os.sep, '/')
316 return string.replace(fname[l:], os.sep, '/')
317 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
318 " cvsroot\n" % (error_prefix, cvsroot, fname))
319 sys.exit(1)
321 def get_co_pipe(c_rev, extra_arguments=''):
322 """Return a command string, and the pipe created using that string.
323 C_REV is a CVSRevision, and EXTRA_ARGUMENTS is used to add extra
324 arguments. The pipe returns the text of that CVS Revision."""
325 ctx = Ctx()
326 if ctx.use_cvs:
327 pipe_cmd = 'cvs %s co -r%s -p %s %s' % \
328 (ctx.cvs_global_arguments, c_rev.rev, extra_arguments,
329 escape_shell_arg(ctx.cvs_module + c_rev.cvs_path))
330 else:
331 pipe_cmd = 'co -q -x,v -p%s %s %s' % \
332 (c_rev.rev, extra_arguments, escape_shell_arg(c_rev.rcs_path()))
333 pipe = Popen3(pipe_cmd, True)
334 pipe.tochild.close()
335 return pipe_cmd, pipe
337 def generate_ignores(c_rev):
338 # Read in props
339 pipe_cmd, pipe = get_co_pipe(c_rev)
340 buf = pipe.fromchild.read(PIPE_READ_SIZE)
341 raw_ignore_val = ""
342 while buf:
343 raw_ignore_val = raw_ignore_val + buf
344 buf = pipe.fromchild.read(PIPE_READ_SIZE)
345 pipe.fromchild.close()
346 error_output = pipe.childerr.read()
347 exit_status = pipe.wait()
348 if exit_status:
349 sys.exit("%s: The command '%s' failed with exit status: %s\n"
350 "and the following output:\n"
351 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
353 # Tweak props: First, convert any spaces to newlines...
354 raw_ignore_val = '\n'.join(raw_ignore_val.split())
355 raw_ignores = raw_ignore_val.split('\n')
356 ignore_vals = [ ]
357 for ignore in raw_ignores:
358 # Reset the list if we encounter a '!'
359 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
360 if ignore == '!':
361 ignore_vals = [ ]
362 continue
363 # Skip empty lines
364 if len(ignore) == 0:
365 continue
366 ignore_vals.append(ignore)
367 return ignore_vals
369 # Return a string that has not been returned by gen_key() before.
370 gen_key_base = 0L
371 def gen_key():
372 global gen_key_base
373 key = '%x' % gen_key_base
374 gen_key_base = gen_key_base + 1
375 return key
377 # ============================================================================
378 # This code is copied with a few modifications from:
379 # subversion/subversion/bindings/swig/python/svn/core.py
381 if sys.platform == "win32":
382 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
384 def escape_shell_arg(arg):
385 # The (very strange) parsing rules used by the C runtime library are
386 # described at:
387 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
389 # double up slashes, but only if they are followed by a quote character
390 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
392 # surround by quotes and escape quotes inside
393 arg = '"' + string.replace(arg, '"', '"^""') + '"'
394 return arg
397 def argv_to_command_string(argv):
398 """Flatten a list of command line arguments into a command string.
400 The resulting command string is expected to be passed to the system
401 shell which os functions like popen() and system() invoke internally.
404 # According cmd's usage notes (cmd /?), it parses the command line by
405 # "seeing if the first character is a quote character and if so, stripping
406 # the leading character and removing the last quote character."
407 # So to prevent the argument string from being changed we add an extra set
408 # of quotes around it here.
409 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
411 else:
412 def escape_shell_arg(str):
413 return "'" + string.replace(str, "'", "'\\''") + "'"
415 def argv_to_command_string(argv):
416 """Flatten a list of command line arguments into a command string.
418 The resulting command string is expected to be passed to the system
419 shell which os functions like popen() and system() invoke internally.
422 return string.join(map(escape_shell_arg, argv), " ")
423 # ============================================================================
425 def format_date(date):
426 """Return an svn-compatible date string for DATE (seconds since epoch)."""
427 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
428 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
430 def sort_file(infile, outfile):
431 # sort the log files
433 # GNU sort will sort our dates differently (incorrectly!) if our
434 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
435 # it to 'C'
436 if os.environ.has_key('LC_ALL'):
437 lc_all_tmp = os.environ['LC_ALL']
438 else:
439 lc_all_tmp = None
440 os.environ['LC_ALL'] = 'C'
441 # The -T option to sort has a nice side effect. The Win32 sort is
442 # case insensitive and cannot be used, and since it does not
443 # understand the -T option and dies if we try to use it, there is
444 # no risk that we use that sort by accident.
445 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
446 if lc_all_tmp is None:
447 del os.environ['LC_ALL']
448 else:
449 os.environ['LC_ALL'] = lc_all_tmp
451 def print_node_tree(tree, root_node, indent_depth=0):
452 """For debugging purposes. Prints all nodes in TREE that are
453 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
454 debugging with the print statement in this function."""
455 if not indent_depth:
456 print "TREE", "=" * 75
457 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
458 for key, value in tree[root_node].items():
459 if key[0] == '/': #Skip flags
460 continue
461 print_node_tree(tree, value, (indent_depth + 1))
463 def match_regexp_list(regexp_list, string):
464 """Test whether STRING matches any of the compiled regexps in REGEXP_LIST."""
465 for regexp in regexp_list:
466 if regexp.match(string):
467 return True
468 return False
470 class LF_EOL_Filter:
471 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
472 into LFs only."""
473 def __init__(self, stream):
474 self.stream = stream
475 self.carry_cr = False
476 self.eof = False
478 def read(self, size):
479 while True:
480 buf = self.stream.read(size)
481 self.eof = len(buf) == 0
482 if self.carry_cr:
483 buf = '\r' + buf
484 self.carry_cr = False
485 if not self.eof and buf[-1] == '\r':
486 self.carry_cr = True
487 buf = buf[:-1]
488 buf = string.replace(buf, '\r\n', '\n')
489 buf = string.replace(buf, '\r', '\n')
490 if len(buf) > 0 or self.eof:
491 return buf
494 # These constants represent the log levels that this script supports
495 LOG_WARN = -1
496 LOG_QUIET = 0
497 LOG_NORMAL = 1
498 LOG_VERBOSE = 2
499 class Log:
500 """A Simple logging facility. Each line will be timestamped is
501 self.use_timestamps is TRUE. This class is a Borg, see
502 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
503 __shared_state = {}
504 def __init__(self):
505 self.__dict__ = self.__shared_state
506 if self.__dict__:
507 return
508 self.log_level = LOG_NORMAL
509 # Set this to true if you want to see timestamps on each line output.
510 self.use_timestamps = None
511 self.logger = sys.stdout
513 def _timestamp(self):
514 """Output a detailed timestamp at the beginning of each line output."""
515 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
517 def write(self, log_level, *args):
518 """This is the public method to use for writing to a file. Only
519 messages whose LOG_LEVEL is <= self.log_level will be printed. If
520 there are multiple ARGS, they will be separated by a space."""
521 if log_level > self.log_level:
522 return
523 if self.use_timestamps:
524 self._timestamp()
525 self.logger.write(' '.join(map(str,args)) + "\n")
526 # Ensure that log output doesn't get out-of-order with respect to
527 # stderr output.
528 self.logger.flush()
531 class Cleanup:
532 """This singleton class manages any files created by cvs2svn. When
533 you first create a file, call Cleanup.register, passing the
534 filename, and the last pass that you need the file. After the end
535 of that pass, your file will be cleaned up after running an optional
536 callback. This class is a Borg, see
537 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
539 __shared_state = {}
540 def __init__(self):
541 self.__dict__ = self.__shared_state
542 if self.__dict__:
543 return
544 self._log = {}
545 self._callbacks = {}
547 def register(self, file, which_pass, callback=None):
548 """Register FILE for cleanup at the end of WHICH_PASS, running
549 function CALLBACK prior to removal. Registering a given FILE is
550 idempotent; you may register as many times as you wish, but it
551 will only be cleaned up once.
553 Note that if a file is registered multiple times, only the first
554 callback registered for that file will be called at cleanup
555 time. Also note that if you register a database file you must
556 close the database before cleanup, e.g. using a callback."""
557 if not self._log.has_key(which_pass):
558 self._log[which_pass] = {}
559 self._log[which_pass][file] = 1
560 if callback and not self._callbacks.has_key(file):
561 self._callbacks[file] = callback
563 def cleanup(self, which_pass):
564 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
565 if not self._log.has_key(which_pass):
566 return
567 for file in self._log[which_pass].keys():
568 Log().write(LOG_VERBOSE, "Deleting", file)
569 if self._callbacks.has_key(file):
570 self._callbacks[file]()
571 os.unlink(file)
574 # Always use these constants for opening databases.
575 DB_OPEN_READ = 'r'
576 DB_OPEN_NEW = 'n'
578 # A wrapper for anydbm that uses the marshal module to store items as
579 # strings.
580 class Database:
581 def __init__(self, filename, mode):
582 # pybsddb3 has a bug which prevents it from working with
583 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
584 # causes the DB_TRUNCATE flag to be passed, which is disallowed
585 # for databases protected by lock and transaction support
586 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
588 # Therefore, manually perform the removal (we can do this, because
589 # we know that for bsddb - but *not* anydbm in general - the database
590 # consists of one file with the name we specify, rather than several
591 # based on that name).
592 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
593 if os.path.isfile(filename):
594 os.unlink(filename)
595 mode = 'c'
597 self.db = anydbm.open(filename, mode)
599 def has_key(self, key):
600 return self.db.has_key(key)
602 def __getitem__(self, key):
603 return marshal.loads(self.db[key])
605 def __setitem__(self, key, value):
606 self.db[key] = marshal.dumps(value)
608 def __delitem__(self, key):
609 del self.db[key]
611 def get(self, key, default):
612 if self.has_key(key):
613 return self.__getitem__(key)
614 return default
617 class StatsKeeper:
618 __shared_state = { }
619 def __init__(self):
620 self.__dict__ = self.__shared_state
621 if self.__dict__:
622 return
623 self.filename = temp(STATISTICS_FILE)
624 Cleanup().register(self.filename, pass8)
625 # This can get kinda large, so we don't store it in our data dict.
626 self.repos_files = { }
628 if os.path.exists(self.filename):
629 self.unarchive()
630 else:
631 self.data = { 'cvs_revs_count' : 0,
632 'tags': { },
633 'branches' : { },
634 'repos_size' : 0,
635 'repos_file_count' : 0,
636 'svn_rev_count' : None,
637 'first_rev_date' : 1L<<32,
638 'last_rev_date' : 0,
639 'pass_timings' : { },
640 'start_time' : 0,
641 'end_time' : 0,
644 def log_duration_for_pass(self, duration, pass_num):
645 self.data['pass_timings'][pass_num] = duration
647 def set_start_time(self, start):
648 self.data['start_time'] = start
650 def set_end_time(self, end):
651 self.data['end_time'] = end
653 def _bump_item(self, key, amount=1):
654 self.data[key] = self.data[key] + amount
656 def reset_c_rev_info(self):
657 self.data['cvs_revs_count'] = 0
658 self.data['tags'] = { }
659 self.data['branches'] = { }
661 def record_c_rev(self, c_rev):
662 self._bump_item('cvs_revs_count')
664 for tag in c_rev.tags:
665 self.data['tags'][tag] = None
666 for branch in c_rev.branches:
667 self.data['branches'][branch] = None
669 if c_rev.timestamp < self.data['first_rev_date']:
670 self.data['first_rev_date'] = c_rev.timestamp
672 if c_rev.timestamp > self.data['last_rev_date']:
673 self.data['last_rev_date'] = c_rev.timestamp
675 # Only add the size if this is the first time we see the file.
676 if not self.repos_files.has_key(c_rev.fname):
677 self._bump_item('repos_size', c_rev.file_size)
678 self.repos_files[c_rev.fname] = None
680 self.data['repos_file_count'] = len(self.repos_files)
682 def set_svn_rev_count(self, count):
683 self.data['svn_rev_count'] = count
685 def svn_rev_count(self):
686 return self.data['svn_rev_count']
688 def archive(self):
689 open(self.filename, 'w').write(marshal.dumps(self.data))
691 def unarchive(self):
692 self.data = marshal.loads(open(self.filename, 'r').read())
694 def __str__(self):
695 svn_revs_str = ""
696 if self.data['svn_rev_count'] is not None:
697 svn_revs_str = ('Total SVN Commits: %10s\n'
698 % self.data['svn_rev_count'])
700 return ('\n' \
701 'cvs2svn Statistics:\n' \
702 '------------------\n' \
703 'Total CVS Files: %10i\n' \
704 'Total CVS Revisions: %10i\n' \
705 'Total Unique Tags: %10i\n' \
706 'Total Unique Branches: %10i\n' \
707 'CVS Repos Size in KB: %10i\n' \
708 '%s' \
709 'First Revision Date: %s\n' \
710 'Last Revision Date: %s\n' \
711 '------------------' \
712 % (self.data['repos_file_count'],
713 self.data['cvs_revs_count'],
714 len(self.data['tags']),
715 len(self.data['branches']),
716 (self.data['repos_size'] / 1024),
717 svn_revs_str,
718 time.ctime(self.data['first_rev_date']),
719 time.ctime(self.data['last_rev_date']),
722 def timings(self):
723 passes = self.data['pass_timings'].keys()
724 passes.sort()
725 str = 'Timings:\n------------------\n'
727 def desc(val):
728 if val == 1: return "second"
729 return "seconds"
731 for pass_num in passes:
732 duration = int(self.data['pass_timings'][pass_num])
733 p_str = ('pass %d:%6d %s\n'
734 % (pass_num, duration, desc(duration)))
735 str = str + p_str
737 total = int(self.data['end_time'] - self.data['start_time'])
738 str = str + ('total: %6d %s' % (total, desc(total)))
739 return str
742 class LastSymbolicNameDatabase:
743 """ Passing every CVSRevision in s-revs to this class will result in
744 a Database whose key is the last CVS Revision a symbolicname was
745 seen in, and whose value is a list of all symbolicnames that were
746 last seen in that revision."""
747 def __init__(self, mode):
748 self.symbols = {}
749 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
750 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
752 # Once we've gone through all the revs,
753 # symbols.keys() will be a list of all tags and branches, and
754 # their corresponding values will be a key into the last CVS revision
755 # that they were used in.
756 def log_revision(self, c_rev):
757 # Gather last CVS Revision for symbolic name info and tag info
758 for tag in c_rev.tags:
759 self.symbols[tag] = c_rev.unique_key()
760 if c_rev.op is not OP_DELETE:
761 for branch in c_rev.branches:
762 self.symbols[branch] = c_rev.unique_key()
764 # Creates an inversion of symbols above--a dictionary of lists (key
765 # = CVS rev unique_key: val = list of symbols that close in that
766 # rev.
767 def create_database(self):
768 for sym, rev_unique_key in self.symbols.items():
769 if self.symbol_revs_db.has_key(rev_unique_key):
770 ary = self.symbol_revs_db[rev_unique_key]
771 ary.append(sym)
772 self.symbol_revs_db[rev_unique_key] = ary
773 else:
774 self.symbol_revs_db[rev_unique_key] = [sym]
777 class CVSRevisionDatabase:
778 """A Database to store CVSRevision objects and retrieve them by their
779 unique_key()."""
781 def __init__(self, mode):
782 """Initialize an instance, opening database in MODE (like the MODE
783 argument to Database or anydbm.open())."""
784 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
785 Cleanup().register(temp(CVS_REVS_DB), pass8)
787 def log_revision(self, c_rev):
788 """Add C_REV, a CVSRevision, to the database."""
789 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
791 def get_revision(self, unique_key):
792 """Return the CVSRevision stored under UNIQUE_KEY."""
793 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
796 class TagsDatabase(Database):
797 """A Database to store which symbolic names are tags.
798 Each key is a tag name.
799 The value has no meaning, and should be set to None."""
800 def __init__(self, mode):
801 Database.__init__(self, temp(TAGS_DB), mode)
802 Cleanup().register(temp(TAGS_DB), pass8)
805 class CVSRevision:
806 def __init__(self, ctx, *args):
807 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
809 If CTX is None, the following members and methods of the
810 instantiated CVSRevision class object will be unavailable (or
811 simply will not work correctly, if at all):
812 cvs_path
813 svn_path
814 svn_trunk_path
815 is_default_branch_revision()
817 (Note that this class treats CTX as const, because the caller
818 likely passed in a Borg instance of a Ctx. The reason this class
819 takes CTX as as a parameter, instead of just instantiating a Ctx
820 itself, is that this class should be usable outside cvs2svn.)
822 If there is one argument in ARGS, it is a string, in the format of
823 a line from a revs file. Do *not* include a trailing newline.
825 If there are multiple ARGS, there must be 16 of them,
826 comprising a parsed revs line:
827 timestamp --> (int) date stamp for this cvs revision
828 digest --> (string) digest of author+logmsg
829 prev_timestamp --> (int) date stamp for the previous cvs revision
830 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
831 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
832 rev --> (string) this CVS rev, e.g., "1.3"
833 next_rev --> (string or None) next CVS rev, e.g., "1.4"
834 file_in_attic --> (char or None) true if RCS file is in Attic
835 file_executable --> (char or None) true if RCS file has exec bit set.
836 file_size --> (int) size of the RCS file
837 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
838 mode --> (string or None) "kkv", "kb", etc.
839 branch_name --> (string or None) branch on which this rev occurred
840 tags --> (list of strings) all tags on this revision
841 branches --> (list of strings) all branches rooted in this rev
842 fname --> (string) relative path of file in CVS repos
844 The two forms of initialization are equivalent."""
846 self._ctx = ctx
847 if len(args) == 16:
848 (self.timestamp, self.digest, self.prev_timestamp, self.op,
849 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
850 self.file_executable, self.file_size, self.deltatext_code,
851 self.fname,
852 self.mode, self.branch_name, self.tags, self.branches) = args
853 elif len(args) == 1:
854 data = args[0].split(' ', 14)
855 (self.timestamp, self.digest, self.prev_timestamp, self.op,
856 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
857 self.file_executable, self.file_size, self.deltatext_code,
858 self.mode, self.branch_name, numtags, remainder) = data
859 # Patch up data items which are not simple strings
860 self.timestamp = int(self.timestamp, 16)
861 if self.prev_timestamp == "*":
862 self.prev_timestamp = 0
863 else:
864 self.prev_timestamp = int(self.prev_timestamp)
865 if self.prev_rev == "*":
866 self.prev_rev = None
867 if self.next_rev == "*":
868 self.next_rev = None
869 if self.file_in_attic == "*":
870 self.file_in_attic = None
871 if self.file_executable == "*":
872 self.file_executable = None
873 self.file_size = int(self.file_size)
874 if self.mode == "*":
875 self.mode = None
876 if self.branch_name == "*":
877 self.branch_name = None
878 numtags = int(numtags)
879 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
880 self.tags = tags_and_numbranches_and_remainder[:-2]
881 numbranches = int(tags_and_numbranches_and_remainder[-2])
882 remainder = tags_and_numbranches_and_remainder[-1]
883 branches_and_fname = remainder.split(' ', numbranches)
884 self.branches = branches_and_fname[:-1]
885 self.fname = branches_and_fname[-1]
886 else:
887 raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
888 (len(args) + 1)
889 if ctx is not None:
890 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
891 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
892 self.svn_trunk_path = self._make_path(self.cvs_path)
894 # The 'primary key' of a CVS Revision is the revision number + the
895 # filename. To provide a unique key (say, for a dict), we just glom
896 # them together in a string. By passing in self.prev_rev or
897 # self.next_rev, you can get the unique key for their respective
898 # CVSRevisions.
899 def unique_key(self, revnum=None):
900 if revnum is None:
901 revnum = self.rev
902 return revnum + "/" + self.fname
904 def __str__(self):
905 return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
906 self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
907 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
908 (self.file_in_attic or "*"), (self.file_executable or "*"),
909 self.file_size,
910 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
911 len(self.tags), self.tags and " " or "", " ".join(self.tags),
912 len(self.branches), self.branches and " " or "", " ".join(self.branches),
913 self.fname, ))
915 # Returns true if this CVSRevision is the opening CVSRevision for
916 # NAME (for this RCS file).
917 def opens_symbolic_name(self, name):
918 if name in self.tags:
919 return 1
920 if name in self.branches:
921 # If this c_rev opens a branch and our op is OP_DELETE, then
922 # that means that the file that this c_rev belongs to was
923 # created on the branch, so for all intents and purposes, this
924 # c_rev is *technically* not an opening. See Issue #62 for more
925 # information.
926 if self.op != OP_DELETE:
927 return 1
928 return 0
930 def is_default_branch_revision(self):
931 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
932 revision according to DEFAULT_BRANCHES_DB (see the conditions
933 documented there), else return None."""
934 if self._ctx._default_branches_db.has_key(self.cvs_path):
935 val = self._ctx._default_branches_db[self.cvs_path]
936 val_last_dot = val.rindex(".")
937 our_last_dot = self.rev.rindex(".")
938 default_branch = val[:val_last_dot]
939 our_branch = self.rev[:our_last_dot]
940 default_rev_component = int(val[val_last_dot + 1:])
941 our_rev_component = int(self.rev[our_last_dot + 1:])
942 if (default_branch == our_branch
943 and our_rev_component <= default_rev_component):
944 return 1
945 # else
946 return None
948 def _make_path(self, path, branch_name = None):
949 """Return the trunk path or branch path for PATH.
951 If PATH is None, return None."""
952 # For a while, we treated each top-level subdir of the CVS
953 # repository as a "project root" and interpolated the appropriate
954 # genealogy (trunk|tag|branch) in according to the official
955 # recommended layout. For example, the path '/foo/bar/baz.c' on
956 # branch 'Rel2' would become
958 # /foo/branches/Rel2/bar/baz.c
960 # and on trunk it would become
962 # /foo/trunk/bar/baz.c
964 # However, we went back to the older and simpler method of just
965 # prepending the genealogy to the front, instead of interpolating.
966 # So now we produce:
968 # /branches/Rel2/foo/bar/baz.c
969 # /trunk/foo/bar/baz.c
971 # Why? Well, Jack Repenning pointed out that this way is much
972 # friendlier to "anonymously rooted subtrees" (that's a tree where
973 # the name of the top level dir doesn't matter, the point is that if
974 # you cd into it and, say, run 'make', something good will happen).
975 # By interpolating, we made it impossible to point cvs2svn at some
976 # subdir in the CVS repository and convert it as a project, because
977 # we'd treat every subdir underneath it as an independent project
978 # root, which is probably not what the user wanted.
980 # Also, see Blair Zajac's post
982 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
984 # and the surrounding thread, for why what people really want is a
985 # way of specifying an in-repository prefix path, not interpolation.
986 if path is None:
987 return None
989 if branch_name:
990 branch_name = _clean_symbolic_name(branch_name)
991 return self._ctx.branches_base + '/' + branch_name + '/' + path
992 else:
993 return self._ctx.trunk_base + '/' + path
995 def rcs_path(self):
996 """Returns the actual filesystem path to the RCS file of this
997 CVSRevision."""
998 if self.file_in_attic is None:
999 return self.fname
1000 else:
1001 basepath, filename = os.path.split(self.fname)
1002 return os.path.join(basepath, 'Attic', filename)
1004 def filename(self):
1005 "Return the last path component of self.fname, minus the ',v'"
1006 return os.path.split(self.fname)[-1][:-2]
1008 class SymbolDatabase:
1009 """This database records information on all symbols in the RCS
1010 files. It is created in pass 1 and it is used in pass 2."""
1011 def __init__(self):
1012 # A hash that maps tag names to commit counts
1013 self.tags = { }
1014 # A hash that maps branch names to lists of the format
1015 # [ create_count, commit_count, blockers ], where blockers
1016 # is a hash that lists the symbols that depend on the
1017 # the branch. The blockers hash is used as a set, so the
1018 # values are not used.
1019 self.branches = { }
1021 def register_tag_creation(self, name):
1022 """Register the creation of the tag NAME."""
1023 if not self.tags.has_key(name):
1024 self.tags[name] = 0
1025 self.tags[name] += 1
1027 def _branch(self, name):
1028 """Helper function to get a branch node that will create and
1029 initialize the node if it does not exist."""
1030 if not self.branches.has_key(name):
1031 self.branches[name] = [ 0, 0, { } ]
1032 return self.branches[name]
1034 def register_branch_creation(self, name):
1035 """Register the creation of the branch NAME."""
1036 self._branch(name)[0] += 1
1038 def register_branch_commit(self, name):
1039 """Register a commit on the branch NAME."""
1040 self._branch(name)[1] += 1
1042 def register_branch_blocker(self, name, blocker):
1043 """Register BLOCKER as a blocker on the branch NAME."""
1044 self._branch(name)[2][blocker] = None
1046 def branch_has_commit(self, name):
1047 """Return non-zero if NAME has commits. Returns 0 if name
1048 is not a branch or if it has no commits."""
1049 return self.branches.has_key(name) and self.branches[name][1]
1051 def find_excluded_symbols(self, regexp_list):
1052 """Returns a hash of all symbols thaht match the regexps in
1053 REGEXP_LISTE. The hash is used as a set so the values are
1054 not used."""
1055 excludes = { }
1056 for tag in self.tags.keys():
1057 if match_regexp_list(regexp_list, tag):
1058 excludes[tag] = None
1059 for branch in self.branches.keys():
1060 if match_regexp_list(regexp_list, branch):
1061 excludes[branch] = None
1062 return excludes
1064 def find_branch_exclude_blockers(self, branch, excludes):
1065 """Find all blockers of BRANCH, excluding the ones in the hash
1066 EXCLUDES."""
1067 blockers = { }
1068 if excludes.has_key(branch):
1069 for blocker in self.branches[branch][2]:
1070 if not excludes.has_key(blocker):
1071 blockers[blocker] = None
1072 return blockers
1074 def find_blocked_excludes(self, excludes):
1075 """Find all branches not in EXCLUDES that have blocking symbols that
1076 are not themselves excluded. Return a hash that maps branch names
1077 to a hash of blockers. The hash of blockes is used as a set so the
1078 values are not used."""
1079 blocked_branches = { }
1080 for branch in self.branches.keys():
1081 blockers = self.find_branch_exclude_blockers(branch, excludes)
1082 if blockers:
1083 blocked_branches[branch] = blockers
1084 return blocked_branches
1086 def find_mismatches(self, excludes=None):
1087 """Find all symbols that are defined as both tags and branches,
1088 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1089 the symbol name, tag count, branch count and commit count."""
1090 if excludes is None:
1091 excludes = { }
1092 mismatches = [ ]
1093 for branch in self.branches.keys():
1094 if not excludes.has_key(branch) and self.tags.has_key(branch):
1095 mismatches.append((branch, # name
1096 self.tags[branch], # tag count
1097 self.branches[branch][0], # branch count
1098 self.branches[branch][1])) # commit count
1099 return mismatches
1101 def read(self):
1102 """Read the symbol database from files."""
1103 f = open(temp(TAGS_LIST))
1104 while 1:
1105 line = f.readline()
1106 if not line:
1107 break
1108 tag, count = line.split()
1109 self.tags[tag] = int(count)
1111 f = open(temp(BRANCHES_LIST))
1112 while 1:
1113 line = f.readline()
1114 if not line:
1115 break
1116 words = line.split()
1117 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1118 for blocker in words[3:]:
1119 self.branches[words[0]][2][blocker] = None
1121 def write(self):
1122 """Store the symbol database to files."""
1123 f = open(temp(TAGS_LIST), "w")
1124 Cleanup().register(temp(TAGS_LIST), pass2)
1125 for tag, count in self.tags.items():
1126 f.write("%s %d\n" % (tag, count))
1128 f = open(temp(BRANCHES_LIST), "w")
1129 Cleanup().register(temp(BRANCHES_LIST), pass2)
1130 for branch, info in self.branches.items():
1131 f.write("%s %d %d" % (branch, info[0], info[1]))
1132 if info[2]:
1133 f.write(" ")
1134 f.write(" ".join(info[2].keys()))
1135 f.write("\n")
1137 class CollectData(cvs2svn_rcsparse.Sink):
1138 def __init__(self):
1139 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1140 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1141 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1142 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1143 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
1144 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1145 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1146 Cleanup().register(temp(METADATA_DB), pass8)
1147 self.fatal_errors = []
1148 self.num_files = 0
1149 self.symbol_db = SymbolDatabase()
1151 # 1 if we've collected data for at least one file, None otherwise.
1152 self.found_valid_file = None
1154 # See set_fname() for initializations of other variables.
1156 def set_fname(self, canonical_name, filename):
1157 """Prepare to receive data for FILENAME. FILENAME is the absolute
1158 filesystem path to the file in question, and CANONICAL_NAME is
1159 FILENAME with the 'Attic' component removed (if the file is indeed
1160 in the Attic) ."""
1161 self.fname = canonical_name
1163 # We calculate and save some file metadata here, where we can do
1164 # it only once per file, instead of waiting until later where we
1165 # would have to do the same calculations once per CVS *revision*.
1167 self.rel_name = relative_name(Ctx().cvsroot, self.fname)[:-2]
1169 # If the paths are not the same, then that means that the
1170 # canonical_name has had the 'Attic' component stripped out.
1171 self.file_in_attic = None
1172 if not canonical_name == filename:
1173 self.file_in_attic = 1
1175 file_stat = os.stat(filename)
1176 # The size of our file in bytes
1177 self.file_size = file_stat[stat.ST_SIZE]
1179 # Whether or not the executable bit is set.
1180 self.file_executable = None
1181 if file_stat[0] & stat.S_IXUSR:
1182 self.file_executable = 1
1184 # revision -> [timestamp, author, old-timestamp]
1185 self.rev_data = { }
1187 # Maps revision number (key) to the revision number of the
1188 # previous revision along this line of development.
1190 # For the first revision R on a branch, we consider the revision
1191 # from which R sprouted to be the 'previous'.
1193 # Note that this revision can't be determined arithmetically (due
1194 # to cvsadmin -o, which is why this is necessary).
1195 self.prev_rev = { }
1197 # This dict is essentially self.prev_rev with the values mapped in
1198 # the other direction, so following key -> value will yield you
1199 # the next revision number
1200 self.next_rev = { }
1202 # Track the state of each revision so that in set_revision_info,
1203 # we can determine if our op is an add/change/delete. We can do
1204 # this because in set_revision_info, we'll have all of the
1205 # revisions for a file at our fingertips, and we need to examine
1206 # the state of our prev_rev to determine if we're an add or a
1207 # change--without the state of the prev_rev, we are unable to
1208 # distinguish between an add and a change.
1209 self.rev_state = { }
1211 # Hash mapping branch numbers, like '1.7.2', to branch names,
1212 # like 'Release_1_0_dev'.
1213 self.branch_names = { }
1215 # RCS flags (used for keyword expansion).
1216 self.mode = None
1218 # Hash mapping revision numbers, like '1.7', to lists of names
1219 # indicating which branches sprout from that revision, like
1220 # ['Release_1_0_dev', 'experimental_driver', ...].
1221 self.branchlist = { }
1223 # Like self.branchlist, but the values are lists of tag names that
1224 # apply to the key revision.
1225 self.taglist = { }
1227 # If set, this is an RCS branch number -- rcsparse calls this the
1228 # "principal branch", but CVS and RCS refer to it as the "default
1229 # branch", so that's what we call it, even though the rcsparse API
1230 # setter method is still 'set_principal_branch'.
1231 self.default_branch = None
1233 # If the RCS file doesn't have a default branch anymore, but does
1234 # have vendor revisions, then we make an educated guess that those
1235 # revisions *were* the head of the default branch up until the
1236 # commit of 1.2, at which point the file's default branch became
1237 # trunk. This records the date at which 1.2 was committed.
1238 self.first_non_vendor_revision_date = None
1240 # A list of all symbols defined for the current file. Used to
1241 # prevent multiple definitions of a symbol, something which can
1242 # easily happen when --symbol-transform is used.
1243 self.defined_symbols = [ ]
1245 def set_principal_branch(self, branch):
1246 self.default_branch = branch
1248 def set_expansion(self, mode):
1249 self.mode = mode
1251 def set_branch_name(self, branch_number, name):
1252 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1253 and that NAME sprouts from BRANCH_NUMBER .
1254 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1255 for example '1.7.2' (never '1.7.0.2')."""
1256 if not self.branch_names.has_key(branch_number):
1257 self.branch_names[branch_number] = name
1258 # The branchlist is keyed on the revision number from which the
1259 # branch sprouts, so strip off the odd final component.
1260 sprout_rev = branch_number[:branch_number.rfind(".")]
1261 if not self.branchlist.has_key(sprout_rev):
1262 self.branchlist[sprout_rev] = []
1263 self.branchlist[sprout_rev].append(name)
1264 self.symbol_db.register_branch_creation(name)
1265 else:
1266 sys.stderr.write("%s: in '%s':\n"
1267 " branch '%s' already has name '%s',\n"
1268 " cannot also have name '%s', ignoring the latter\n"
1269 % (warning_prefix, self.fname, branch_number,
1270 self.branch_names[branch_number], name))
1272 def rev_to_branch_name(self, revision):
1273 """Return the name of the branch on which REVISION lies.
1274 REVISION is a non-branch revision number with an even number of,
1275 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1276 For the convenience of callers, REVISION can also be a trunk
1277 revision such as '1.2', in which case just return None."""
1278 if trunk_rev.match(revision):
1279 return None
1280 return self.branch_names.get(revision[:revision.rindex(".")])
1282 def add_cvs_branch(self, revision, branch_name):
1283 """Record the root revision and branch revision for BRANCH_NAME,
1284 based on REVISION. REVISION is a CVS branch number having an even
1285 number of components where the second-to-last is '0'. For
1286 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1287 from 1.7 and has branch number 1.7.2."""
1288 last_dot = revision.rfind(".")
1289 branch_rev = revision[:last_dot]
1290 last2_dot = branch_rev.rfind(".")
1291 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1292 self.set_branch_name(branch_rev, branch_name)
1294 def define_tag(self, name, revision):
1295 """Record a bidirectional mapping between symbolic NAME and REVISION.
1296 REVISION is an unprocessed revision number from the RCS file's
1297 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1298 This function will determine what kind of symbolic name it is by
1299 inspection, and record it in the right places."""
1300 for (pattern, replacement) in Ctx().symbol_transforms:
1301 newname = re.sub(pattern, replacement, name)
1302 if newname != name:
1303 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1304 % (name, newname))
1305 name = newname
1306 if name in self.defined_symbols:
1307 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1308 % (error_prefix, name, self.fname)
1309 sys.stderr.write(err + "\n")
1310 self.fatal_errors.append(err)
1311 self.defined_symbols.append(name)
1312 if branch_tag.match(revision):
1313 self.add_cvs_branch(revision, name)
1314 elif vendor_tag.match(revision):
1315 self.set_branch_name(revision, name)
1316 else:
1317 if not self.taglist.has_key(revision):
1318 self.taglist[revision] = []
1319 self.taglist[revision].append(name)
1320 self.symbol_db.register_tag_creation(name)
1322 def define_revision(self, revision, timestamp, author, state,
1323 branches, next):
1325 # Record the state of our revision for later calculations
1326 self.rev_state[revision] = state
1328 # store the rev_data as a list in case we have to jigger the timestamp
1329 self.rev_data[revision] = [int(timestamp), author, None]
1331 # When on trunk, the RCS 'next' revision number points to what
1332 # humans might consider to be the 'previous' revision number. For
1333 # example, 1.3's RCS 'next' is 1.2.
1335 # However, on a branch, the RCS 'next' revision number really does
1336 # point to what humans would consider to be the 'next' revision
1337 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1339 # In other words, in RCS, 'next' always means "where to find the next
1340 # deltatext that you need this revision to retrieve.
1342 # That said, we don't *want* RCS's behavior here, so we determine
1343 # whether we're on trunk or a branch and set self.prev_rev
1344 # accordingly.
1346 # One last thing. Note that if REVISION is a branch revision,
1347 # instead of mapping REVISION to NEXT, we instead map NEXT to
1348 # REVISION. Since we loop over all revisions in the file before
1349 # doing anything with the data we gather here, this 'reverse
1350 # assignment' effectively does the following:
1352 # 1. Gives us no 'prev' value for REVISION (in this
1353 # iteration... it may have been set in a previous iteration)
1355 # 2. Sets the 'prev' value for the revision with number NEXT to
1356 # REVISION. So when we come around to the branch revision whose
1357 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1358 # set.
1359 if trunk_rev.match(revision):
1360 self.prev_rev[revision] = next
1361 self.next_rev[next] = revision
1362 elif next:
1363 self.prev_rev[next] = revision
1364 self.next_rev[revision] = next
1366 for b in branches:
1367 self.prev_rev[b] = revision
1369 # Ratchet up the highest vendor head revision, if necessary.
1370 if self.default_branch:
1371 default_branch_root = self.default_branch + "."
1372 if ((revision.find(default_branch_root) == 0)
1373 and (default_branch_root.count('.') == revision.count('.'))):
1374 # This revision is on the default branch, so record that it is
1375 # the new highest default branch head revision.
1376 self.default_branches_db[self.rel_name] = revision
1377 else:
1378 # No default branch, so make an educated guess.
1379 if revision == '1.2':
1380 # This is probably the time when the file stopped having a
1381 # default branch, so make a note of it.
1382 self.first_non_vendor_revision_date = timestamp
1383 else:
1384 m = vendor_revision.match(revision)
1385 if m and ((not self.first_non_vendor_revision_date)
1386 or (timestamp < self.first_non_vendor_revision_date)):
1387 # We're looking at a vendor revision, and it wasn't
1388 # committed after this file lost its default branch, so bump
1389 # the maximum trunk vendor revision in the permanent record.
1390 self.default_branches_db[self.rel_name] = revision
1392 if not trunk_rev.match(revision):
1393 # Check for unlabeled branches, record them. We tried to collect
1394 # all branch names when we parsed the symbolic name header
1395 # earlier, of course, but that didn't catch unlabeled branches.
1396 # If a branch is unlabeled, this is our first encounter with it,
1397 # so we have to record its data now.
1398 branch_number = revision[:revision.rindex(".")]
1399 if not self.branch_names.has_key(branch_number):
1400 branch_name = "unlabeled-" + branch_number
1401 self.set_branch_name(branch_number, branch_name)
1403 # Register the commit on this non-trunk branch
1404 branch_name = self.branch_names[branch_number]
1405 self.symbol_db.register_branch_commit(branch_name)
1407 def tree_completed(self):
1408 "The revision tree has been parsed. Analyze it for consistency."
1410 # Our algorithm depends upon the timestamps on the revisions occuring
1411 # monotonically over time. That is, we want to see rev 1.34 occur in
1412 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1413 # sorting), and then tried to insert 1.34, we'd be screwed.
1415 # to perform the analysis, we'll simply visit all of the 'previous'
1416 # links that we have recorded and validate that the timestamp on the
1417 # previous revision is before the specified revision
1419 # if we have to resync some nodes, then we restart the scan. just keep
1420 # looping as long as we need to restart.
1421 while 1:
1422 for current, prev in self.prev_rev.items():
1423 if not prev:
1424 # no previous revision exists (i.e. the initial revision)
1425 continue
1426 t_c = self.rev_data[current][0]
1427 t_p = self.rev_data[prev][0]
1428 if t_p >= t_c:
1429 # the previous revision occurred later than the current revision.
1430 # shove the previous revision back in time (and any before it that
1431 # may need to shift).
1433 # We sync backwards and not forwards because any given CVS
1434 # Revision has only one previous revision. However, a CVS
1435 # Revision can *be* a previous revision for many other
1436 # revisions (e.g., a revision that is the source of multiple
1437 # branches). This becomes relevant when we do the secondary
1438 # synchronization in pass 2--we can make certain that we
1439 # don't resync a revision earlier than it's previous
1440 # revision, but it would be non-trivial to make sure that we
1441 # don't resync revision R *after* any revisions that have R
1442 # as a previous revision.
1443 while t_p >= t_c:
1444 self.rev_data[prev][0] = t_c - 1 # new timestamp
1445 self.rev_data[prev][2] = t_p # old timestamp
1446 delta = t_c - 1 - t_p
1447 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1448 % (self.rel_name,
1449 prev, time.ctime(t_p), delta)
1450 Log().write(LOG_VERBOSE, msg)
1451 if (delta > COMMIT_THRESHOLD
1452 or delta < (COMMIT_THRESHOLD * -1)):
1453 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1454 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1455 delta))
1456 current = prev
1457 prev = self.prev_rev[current]
1458 if not prev:
1459 break
1460 t_c = t_c - 1 # self.rev_data[current][0]
1461 t_p = self.rev_data[prev][0]
1463 # break from the for-loop
1464 break
1465 else:
1466 # finished the for-loop (no resyncing was performed)
1467 return
1469 def set_revision_info(self, revision, log, text):
1470 timestamp, author, old_ts = self.rev_data[revision]
1471 digest = sha.new(log + '\0' + author).hexdigest()
1472 if old_ts:
1473 # the timestamp on this revision was changed. log it for later
1474 # resynchronization of other files's revisions that occurred
1475 # for this time and log message.
1476 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1478 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1479 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1481 # If revision 1.1 appears to have been created via 'cvs add'
1482 # instead of 'cvs import', then this file probably never had a
1483 # default branch, so retroactively remove its record in the
1484 # default branches db. The test is that the log message CVS uses
1485 # for 1.1 in imports is "Initial revision\n" with no period.
1486 if revision == '1.1' and log != 'Initial revision\n':
1487 if self.default_branches_db.has_key(self.rel_name):
1488 del self.default_branches_db[self.rel_name]
1490 # Get the timestamp of the previous revision
1491 prev_rev = self.prev_rev.get(revision, None)
1492 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1494 # How to tell if a CVSRevision is an add, a change, or a deletion:
1496 # It's a delete if RCS state is 'dead'
1498 # It's an add if RCS state is 'Exp.' and
1499 # - we either have no previous revision
1500 # or
1501 # - we have a previous revision whose state is 'dead'
1503 # Anything else is a change.
1504 if self.rev_state[revision] == 'dead':
1505 op = OP_DELETE
1506 elif ((self.prev_rev.get(revision, None) is None)
1507 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1508 op = OP_ADD
1509 else:
1510 op = OP_CHANGE
1512 if text:
1513 deltatext_code = DELTATEXT_NONEMPTY
1514 else:
1515 deltatext_code = DELTATEXT_EMPTY
1517 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
1518 self.prev_rev[revision], revision,
1519 self.next_rev.get(revision),
1520 self.file_in_attic, self.file_executable,
1521 self.file_size,
1522 deltatext_code, self.fname,
1523 self.mode, self.rev_to_branch_name(revision),
1524 self.taglist.get(revision, []),
1525 self.branchlist.get(revision, []))
1526 self.revs.write(str(c_rev) + "\n")
1527 StatsKeeper().record_c_rev(c_rev)
1529 if not self.metadata_db.has_key(digest):
1530 self.metadata_db[digest] = (author, log)
1532 def parse_completed(self):
1533 # Walk through all branches and tags and register them with
1534 # their parent branch in the symbol database.
1535 for revision, symbols in self.taglist.items() + self.branchlist.items():
1536 for symbol in symbols:
1537 name = self.rev_to_branch_name(revision)
1538 if name is not None:
1539 self.symbol_db.register_branch_blocker(name, symbol)
1541 self.num_files = self.num_files + 1
1543 def write_symbol_db(self):
1544 self.symbol_db.write()
1546 class SymbolingsLogger:
1547 """Manage the file that contains lines for symbol openings and
1548 closings.
1550 This data will later be used to determine valid SVNRevision ranges
1551 from which a file can be copied when creating a branch or tag in
1552 Subversion. Do this by finding "Openings" and "Closings" for each
1553 file copied onto a branch or tag.
1555 An "Opening" is the CVSRevision from which a given branch/tag
1556 sprouts on a path.
1558 The "Closing" for that branch/tag and path is the next CVSRevision
1559 on the same line of development as the opening.
1561 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1562 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1563 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1564 'foo.c'. Note that there may be many revisions chronologically
1565 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1566 perhaps even including on branch BEE itself. But 1.3 is the next
1567 revision *on the same line* as 1.2, that is why it is the closing
1568 revision for those symbolic names of which 1.2 is the opening.
1570 The reason for doing all this hullabaloo is to make branch and tag
1571 creation as efficient as possible by minimizing the number of copies
1572 and deletes per creation. For example, revisions 1.2 and 1.3 of
1573 foo.c might correspond to revisions 17 and 30 in Subversion. That
1574 means that when creating branch BEE, there is some motivation to do
1575 the copy from one of 17-30. Now if there were another file,
1576 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1577 to revisions 24 and 39 in Subversion, we would know that the ideal
1578 thing would be to copy the branch from somewhere between 24 and 29,
1579 inclusive.
1581 def __init__(self):
1582 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1583 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1584 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1585 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1587 # This keys of this dictionary are Subversion repository *source*
1588 # paths for which we've encountered an 'opening'. The values are
1589 # the symbolic names that this path has opened. The only paths
1590 # that should be in this dict are paths whose corresponding
1591 # CVSRevision is a default branch revision.
1592 self.open_paths_with_default_branches = { }
1594 def log_revision(self, c_rev, svn_revnum):
1595 """Log any openings found in C_REV, and if C_REV.next_rev is not
1596 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1597 any) will have its revnum determined later."""
1598 for name in c_rev.tags + c_rev.branches:
1599 name = _clean_symbolic_name(name)
1600 self._note_default_branch_opening(c_rev, name)
1601 if c_rev.op != OP_DELETE:
1602 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1604 # If our c_rev has a next_rev, then that's the closing rev for
1605 # this source revision. Log it to closings for later processing
1606 # since we don't know the svn_revnum yet.
1607 if c_rev.next_rev is not None:
1608 self.closings.write('%s %s\n' %
1609 (name, c_rev.unique_key(c_rev.next_rev)))
1611 def _log(self, name, svn_revnum, svn_path, type):
1612 """Write out a single line to the symbol_openings_closings file
1613 representing that svn_revnum of svn_path is either the opening or
1614 closing (TYPE) of NAME (a symbolic name).
1616 TYPE should only be one of the following global constants:
1617 OPENING or CLOSING."""
1618 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1619 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1620 type, svn_path))
1622 def close(self):
1623 """Iterate through the closings file, lookup the svn_revnum for
1624 each closing CVSRevision, and write a proper line out to the
1625 symbolings file."""
1626 # Use this to get the c_rev.svn_path of our rev_key
1627 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1629 self.closings.close()
1630 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1631 (name, rev_key) = line.rstrip().split(" ", 1)
1632 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1634 c_rev = cvs_revs_db.get_revision(rev_key)
1635 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1637 self.symbolings.close()
1639 def _note_default_branch_opening(self, c_rev, symbolic_name):
1640 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1641 as an opening for SYMBOLIC_NAME."""
1642 path = c_rev.svn_trunk_path
1643 if not self.open_paths_with_default_branches.has_key(path):
1644 self.open_paths_with_default_branches[path] = [ ]
1645 self.open_paths_with_default_branches[path].append(symbolic_name)
1647 def log_default_branch_closing(self, c_rev, svn_revnum):
1648 """If self.open_paths_with_default_branches contains
1649 C_REV.svn_trunk_path, then call log each name in
1650 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1651 closing with SVN_REVNUM as the closing revision number. """
1652 path = c_rev.svn_trunk_path
1653 if self.open_paths_with_default_branches.has_key(path):
1654 # log each symbol as a closing
1655 for name in self.open_paths_with_default_branches[path]:
1656 self._log(name, svn_revnum, path, CLOSING)
1657 # Remove them from the openings list as we're done with them.
1658 del self.open_paths_with_default_branches[path]
1661 class PersistenceManager:
1662 """The PersistenceManager allows us to effectively store SVNCommits
1663 to disk and retrieve them later using only their subversion revision
1664 number as the key. It also returns the subversion revision number
1665 for a given CVSRevision's unique key.
1667 All information pertinent to each SVNCommit is stored in a series of
1668 on-disk databases so that SVNCommits can be retrieved on-demand.
1670 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1671 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1672 databases and be fully-featured.
1673 In 'read' mode, PersistenceManager will open existing on-disk databases
1674 and the set_* methods will be unavailable."""
1675 def __init__(self, mode):
1676 self.mode = mode
1677 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1678 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1679 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1680 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1681 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1682 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1683 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1684 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1685 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1686 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1687 ###PERF kff Elsewhere there are comments about sucking the tags db
1688 ### into memory. That seems like a good idea.
1689 if not Ctx().trunk_only:
1690 self.tags_db = TagsDatabase(DB_OPEN_READ)
1691 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
1692 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1694 # "branch_name" -> svn_revnum in which branch was last filled.
1695 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1696 # revision which would have nothing to do.
1697 self.last_filled = {}
1699 def get_svn_revnum(self, cvs_rev_unique_key):
1700 """Return the Subversion revision number in which
1701 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1702 is no mapping for CVS_REV_UNIQUE_KEY."""
1703 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1705 def get_svn_commit(self, svn_revnum):
1706 """Return an SVNCommit that corresponds to SVN_REVNUM.
1708 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1710 This method can throw SVNCommitInternalInconsistencyError.
1712 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1713 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1714 if c_rev_keys == None:
1715 return None
1717 digest = None
1718 for key in c_rev_keys:
1719 c_rev = self.cvs_revisions.get_revision(key)
1720 svn_commit.add_revision(c_rev)
1721 # Set the author and log message for this commit by using
1722 # CVSRevision metadata, but only if haven't done so already.
1723 if digest is None:
1724 digest = c_rev.digest
1725 author, log_msg = self.svn_commit_metadata[digest]
1726 svn_commit.set_author(author)
1727 svn_commit.set_log_msg(log_msg)
1729 # If we're doing a trunk-only conversion, we don't need to do any more work.
1730 if Ctx().trunk_only:
1731 return svn_commit
1733 name, date = self._get_name_and_date(svn_revnum)
1734 if name:
1735 svn_commit.set_symbolic_name(name)
1736 svn_commit.set_date(date)
1737 if self.tags_db.has_key(name):
1738 svn_commit.is_tag = 1
1740 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1741 if motivating_revnum:
1742 svn_commit.set_motivating_revnum(int(motivating_revnum))
1743 svn_commit.set_date(date)
1745 if len(svn_commit.cvs_revs) and name:
1746 msg = """An SVNCommit cannot have cvs_revisions *and* a
1747 corresponding symbolic name ('%s') to fill.""" % name
1748 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1750 return svn_commit
1752 def set_cvs_revs(self, svn_revnum, cvs_revs):
1753 """Record the bidirectional mapping between SVN_REVNUM and
1754 CVS_REVS."""
1755 if self.mode == DB_OPEN_READ:
1756 raise RuntimeError, \
1757 'Write operation attempted on read-only PersistenceManager'
1758 for c_rev in cvs_revs:
1759 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1760 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1761 for c_rev in cvs_revs:
1762 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1764 def set_name_and_date(self, svn_revnum, name, date):
1765 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1766 if self.mode == DB_OPEN_READ:
1767 raise RuntimeError, \
1768 'Write operation attempted on read-only PersistenceManager'
1769 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1770 self.last_filled[name] = svn_revnum
1772 def _get_name_and_date(self, svn_revnum):
1773 """Return a tuple containing the symbolic name and date associated
1774 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1775 associated with it."""
1776 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1778 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1779 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1780 if self.mode == DB_OPEN_READ:
1781 raise RuntimeError, \
1782 'Write operation attempted on read-only PersistenceManager'
1783 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1786 class CVSCommit:
1787 """Each instance of this class contains a number of CVS Revisions
1788 that correspond to one or more Subversion Commits. After all CVS
1789 Revisions are added to the grouping, calling process_revisions will
1790 generate a Subversion Commit (or Commits) for the set of CVS
1791 Revisions in the grouping."""
1793 def __init__(self, digest, author, log):
1794 self.digest = digest
1795 self.author = author
1796 self.log = log
1798 # Symbolic names for which the last source revision has already
1799 # been seen and for which the CVSRevisionAggregator has already
1800 # generated a fill SVNCommit. See self.process_revisions().
1801 self.done_symbols = [ ]
1803 self.files = { }
1804 # Lists of CVSRevisions
1805 self.changes = [ ]
1806 self.deletes = [ ]
1808 # Start out with a t_min higher than any incoming time T, and a
1809 # t_max lower than any incoming T. This way the first T will
1810 # push t_min down to T, and t_max up to T, naturally (without any
1811 # special-casing), and successive times will then ratchet them
1812 # outward as appropriate.
1813 self.t_min = 1L<<32
1814 self.t_max = 0
1816 # This will be set to the SVNCommit that occurs in self._commit.
1817 self.motivating_commit = None
1819 # This is a list of all non-primary commits motivated by the main
1820 # commit. We gather these so that we can set their dates to the
1821 # same date as the primary commit.
1822 self.secondary_commits = [ ]
1824 # State for handling default branches.
1826 # Here is a tempting, but ultimately nugatory, bit of logic, which
1827 # I share with you so you may appreciate the less attractive, but
1828 # refreshingly non-nugatory, logic which follows it:
1830 # If some of the commits in this txn happened on a non-trunk
1831 # default branch, then those files will have to be copied into
1832 # trunk manually after being changed on the branch (because the
1833 # RCS "default branch" appears as head, i.e., trunk, in practice).
1834 # As long as those copies don't overwrite any trunk paths that
1835 # were also changed in this commit, then we can do the copies in
1836 # the same revision, because they won't cover changes that don't
1837 # appear anywhere/anywhen else. However, if some of the trunk dst
1838 # paths *did* change in this commit, then immediately copying the
1839 # branch changes would lose those trunk mods forever. So in this
1840 # case, we need to do at least that copy in its own revision. And
1841 # for simplicity's sake, if we're creating the new revision for
1842 # even one file, then we just do all such copies together in the
1843 # new revision.
1845 # Doesn't that sound nice?
1847 # Unfortunately, Subversion doesn't support copies with sources
1848 # in the current txn. All copies must be based in committed
1849 # revisions. Therefore, we generate the above-described new
1850 # revision unconditionally.
1852 # This is a list of c_revs, and a c_rev is appended for each
1853 # default branch commit that will need to be copied to trunk (or
1854 # deleted from trunk) in some generated revision following the
1855 # "regular" revision.
1856 self.default_branch_cvs_revisions = [ ]
1858 def __cmp__(self, other):
1859 # Commits should be sorted by t_max. If both self and other have
1860 # the same t_max, break the tie using t_min, and lastly, digest
1861 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1862 or cmp(self.digest, other.digest))
1864 def has_file(self, fname):
1865 return self.files.has_key(fname)
1867 def revisions(self):
1868 return self.changes + self.deletes
1870 def opens_symbolic_name(self, name):
1871 """Returns true if any CVSRevision in this commit is on a tag or a
1872 branch or is the origin of a tag or branch."""
1873 for c_rev in self.revisions():
1874 if c_rev.opens_symbolic_name(name):
1875 return 1
1876 return 0
1878 def add_revision(self, c_rev):
1879 # Record the time range of this commit.
1881 # ### ISSUE: It's possible, though unlikely, that the time range
1882 # of a commit could get gradually expanded to be arbitrarily
1883 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1884 # problem, and anyway deciding where to break it up would be a
1885 # judgement call. For now, we just print a warning in commit() if
1886 # this happens.
1887 if c_rev.timestamp < self.t_min:
1888 self.t_min = c_rev.timestamp
1889 if c_rev.timestamp > self.t_max:
1890 self.t_max = c_rev.timestamp
1892 if c_rev.op == OP_DELETE:
1893 self.deletes.append(c_rev)
1894 else:
1895 # OP_CHANGE or OP_ADD
1896 self.changes.append(c_rev)
1898 self.files[c_rev.fname] = 1
1900 def _pre_commit(self):
1901 """Generates any SVNCommits that must exist before the main
1902 commit."""
1904 # There may be multiple c_revs in this commit that would cause
1905 # branch B to be filled, but we only want to fill B once. On the
1906 # other hand, there might be multiple branches committed on in
1907 # this commit. Whatever the case, we should count exactly one
1908 # commit per branch, because we only fill a branch once per
1909 # CVSCommit. This list tracks which branches we've already
1910 # counted.
1911 accounted_for_sym_names = [ ]
1913 def fill_needed(c_rev, pm):
1914 """Return 1 if this is the first commit on a new branch (for
1915 this file) and we need to fill the branch; else return 0
1916 (meaning that some other file's first commit on the branch has
1917 already done the fill for us).
1919 If C_REV.op is OP_ADD, only return 1 if the branch that this
1920 commit is on has no last filled revision.
1922 PM is a PersistenceManager to query.
1925 # Different '.' counts indicate that c_rev is now on a different
1926 # line of development (and may need a fill)
1927 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1928 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1929 # It should be the case that when we have a file F that
1930 # is added on branch B (thus, F on trunk is in state
1931 # 'dead'), we generate an SVNCommit to fill B iff the branch
1932 # has never been filled before.
1934 # If this c_rev.op == OP_ADD, *and* the branch has never
1935 # been filled before, then fill it now. Otherwise, no need to
1936 # fill it.
1937 if c_rev.op == OP_ADD:
1938 if pm.last_filled.get(c_rev.branch_name, None) is None:
1939 return 1
1940 else:
1941 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1942 return 1
1943 return 0
1945 for c_rev in self.changes + self.deletes:
1946 # If a commit is on a branch, we must ensure that the branch
1947 # path being committed exists (in HEAD of the Subversion
1948 # repository). If it doesn't exist, we will need to fill the
1949 # branch. After the fill, the path on which we're committing
1950 # will exist.
1951 if c_rev.branch_name \
1952 and c_rev.branch_name not in accounted_for_sym_names \
1953 and c_rev.branch_name not in self.done_symbols \
1954 and fill_needed(c_rev, Ctx()._persistence_manager):
1955 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1956 % c_rev.branch_name)
1957 svn_commit.set_symbolic_name(c_rev.branch_name)
1958 self.secondary_commits.append(svn_commit)
1959 accounted_for_sym_names.append(c_rev.branch_name)
1961 def _commit(self):
1962 """Generates the primary SVNCommit that corresponds the this
1963 CVSCommit."""
1964 # Generate an SVNCommit unconditionally. Even if the only change
1965 # in this CVSCommit is a deletion of an already-deleted file (that
1966 # is, a CVS revision in state 'dead' whose predecessor was also in
1967 # state 'dead'), the conversion will still generate a Subversion
1968 # revision containing the log message for the second dead
1969 # revision, because we don't want to lose that information.
1970 svn_commit = SVNCommit("commit")
1971 self.motivating_commit = svn_commit
1973 for c_rev in self.changes:
1974 svn_commit.add_revision(c_rev)
1975 # Only make a change if we need to. When 1.1.1.1 has an empty
1976 # deltatext, the explanation is almost always that we're looking
1977 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1978 # such imports, CVS creates an RCS file where 1.1 has the
1979 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1980 # content as 1.1. There's no reason to reflect this non-change
1981 # in the repository, so we want to do nothing in this case. (If
1982 # we were really paranoid, we could make sure 1.1's log message
1983 # is the CVS-generated "Initial revision\n", but I think the
1984 # conditions below are strict enough.)
1985 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1986 and (c_rev.rev == "1.1.1.1")):
1987 if c_rev.is_default_branch_revision():
1988 self.default_branch_cvs_revisions.append(c_rev)
1990 for c_rev in self.deletes:
1991 # When a file is added on a branch, CVS not only adds the file
1992 # on the branch, but generates a trunk revision (typically
1993 # 1.1) for that file in state 'dead'. We only want to add
1994 # this revision if the log message is not the standard cvs
1995 # fabricated log message.
1996 if c_rev.prev_rev is None:
1997 # c_rev.branches may be empty if the originating branch
1998 # has been excluded.
1999 if not c_rev.branches:
2000 continue
2001 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2002 % (c_rev.filename(),
2003 c_rev.branches[0]))
2004 author, log_msg = \
2005 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2006 if log_msg == cvs_generated_msg:
2007 continue
2009 svn_commit.add_revision(c_rev)
2010 if c_rev.is_default_branch_revision():
2011 self.default_branch_cvs_revisions.append(c_rev)
2013 # There is a slight chance that we didn't actually register any
2014 # CVSRevisions with our SVNCommit (see loop over self.deletes
2015 # above), so if we have no CVSRevisions, we don't flush the
2016 # svn_commit to disk and roll back our revnum.
2017 if len(svn_commit.cvs_revs) > 0:
2018 svn_commit.flush()
2019 else:
2020 # We will not be flushing this SVNCommit, so rollback the
2021 # SVNCommit revision counter.
2022 SVNCommit.revnum = SVNCommit.revnum - 1
2024 if not Ctx().trunk_only:
2025 for c_rev in self.revisions():
2026 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2028 def _post_commit(self):
2029 """Generates any SVNCommits that we can perform now that _commit
2030 has happened. That is, handle non-trunk default branches.
2031 Sometimes an RCS file has a non-trunk default branch, so a commit
2032 on that default branch would be visible in a default CVS checkout
2033 of HEAD. If we don't copy that commit over to Subversion's trunk,
2034 then there will be no Subversion tree which corresponds to that
2035 CVS checkout. Of course, in order to copy the path over, we may
2036 first need to delete the existing trunk there. """
2038 # Only generate a commit if we have default branch revs
2039 if len(self.default_branch_cvs_revisions):
2040 # Generate an SVNCommit for all of our default branch c_revs.
2041 svn_commit = SVNCommit("post-commit default branch(es)")
2042 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2043 for c_rev in self.default_branch_cvs_revisions:
2044 svn_commit.add_revision(c_rev)
2045 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2046 svn_commit.revnum)
2047 self.secondary_commits.append(svn_commit)
2049 def process_revisions(self, done_symbols):
2050 """Process all the CVSRevisions that this instance has, creating
2051 one or more SVNCommits in the process. Generate fill SVNCommits
2052 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2053 fills).
2055 Return the primary SVNCommit that corresponds to this CVSCommit.
2056 The returned SVNCommit is the commit that motivated any other
2057 SVNCommits generated in this CVSCommit."""
2058 self.done_symbols = done_symbols
2059 seconds = self.t_max - self.t_min + 1
2061 Log().write(LOG_VERBOSE, '-' * 60)
2062 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2063 if seconds == 1:
2064 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2065 % time.ctime(self.t_max))
2066 else:
2067 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2068 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2069 % (time.ctime(self.t_max), seconds))
2071 if seconds > COMMIT_THRESHOLD + 1:
2072 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2073 % (warning_prefix, COMMIT_THRESHOLD))
2075 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2076 self._commit()
2077 return self.motivating_commit
2079 self._pre_commit()
2080 self._commit()
2081 self._post_commit()
2083 for svn_commit in self.secondary_commits:
2084 svn_commit.set_date(self.motivating_commit.get_date())
2085 svn_commit.flush()
2087 return self.motivating_commit
2090 class SVNCommit:
2091 """This represents one commit to the Subversion Repository. There
2092 are three types of SVNCommits:
2094 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2096 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2098 3. Updates trunk to reflect the contents of a particular branch
2099 (this is to handle RCS default branches)."""
2101 # The revision number to assign to the next new SVNCommit.
2102 # We start at 2 because SVNRepositoryMirror uses the first commit
2103 # to create trunk, tags, and branches.
2104 revnum = 2
2106 class SVNCommitInternalInconsistencyError(Exception):
2107 """Exception raised if we encounter an impossible state in the
2108 SVNCommit Databases."""
2109 pass
2111 def __init__(self, description="", revnum=None, cvs_revs=None):
2112 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2113 If REVNUM, the SVNCommit will correspond to that revision number;
2114 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2115 REVNUM.
2117 It is an error to pass CVS_REVS without REVNUM, but you may pass
2118 REVNUM without CVS_REVS, and then add a revision at a time by
2119 invoking add_revision()."""
2120 self._description = description
2122 # Revprop metadata for this commit.
2124 # These initial values are placeholders. At least the log and the
2125 # date should be different by the time these are used.
2127 # They are private because their values should be returned encoded
2128 # in UTF8, but callers aren't required to set them in UTF8.
2129 # Therefore, accessor methods are used to set them, and
2130 # self.get_revprops() is used to to get them, in dictionary form.
2131 self._author = Ctx().username
2132 self._log_msg = "This log message means an SVNCommit was used too soon."
2133 self._max_date = 0 # Latest date seen so far.
2135 self.cvs_revs = cvs_revs or []
2136 if revnum:
2137 self.revnum = revnum
2138 else:
2139 self.revnum = SVNCommit.revnum
2140 SVNCommit.revnum = SVNCommit.revnum + 1
2142 # The symbolic name that is filled in this SVNCommit, if any
2143 self.symbolic_name = None
2145 # If this commit is a default branch synchronization, this
2146 # variable represents the subversion revision number of the
2147 # *primary* commit where the default branch changes actually
2148 # happened. It is None otherwise.
2150 # It is possible for multiple synchronization commits to refer to
2151 # the same motivating commit revision number, and it is possible
2152 # for a single synchronization commit to contain CVSRevisions on
2153 # multiple different default branches.
2154 self.motivating_revnum = None
2156 # is_tag is true only if this commit is a fill of a symbolic name
2157 # that is a tag, None in all other cases.
2158 self.is_tag = None
2160 def set_symbolic_name(self, name):
2161 "Set self.symbolic_name to NAME."
2162 name = _clean_symbolic_name(name)
2163 self.symbolic_name = name
2165 def set_motivating_revnum(self, revnum):
2166 "Set self.motivating_revnum to REVNUM."
2167 self.motivating_revnum = revnum
2169 def set_author(self, author):
2170 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2171 This is the only way to set an SVNCommit's author."""
2172 self._author = author
2174 def set_log_msg(self, msg):
2175 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2176 This is the only way to set an SVNCommit's log message."""
2177 self._log_msg = msg
2179 def set_date(self, date):
2180 """Set this SVNCommit's date to DATE (an integer).
2181 Note that self.add_revision() updates this automatically based on
2182 a CVSRevision; so you may not need to call this at all, and even
2183 if you do, the value may be overwritten by a later call to
2184 self.add_revision()."""
2185 self._max_date = date
2187 def get_date(self):
2188 """Returns this SVNCommit's date as an integer."""
2189 return self._max_date
2191 def get_revprops(self):
2192 """Return the Subversion revprops for this SVNCommit."""
2193 date = format_date(self._max_date)
2194 try:
2195 ### FIXME: The 'replace' behavior should be an option, like
2196 ### --encoding is.
2197 utf8_author = None
2198 if self._author is not None:
2199 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
2200 utf8_author = unicode_author.encode('utf8')
2201 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
2202 utf8_log = unicode_log.encode('utf8')
2203 return { 'svn:author' : utf8_author,
2204 'svn:log' : utf8_log,
2205 'svn:date' : date }
2206 except UnicodeError:
2207 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2208 % warning_prefix)
2209 Log().write(LOG_WARN, " author: '%s'" % self._author)
2210 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2211 Log().write(LOG_WARN, " date: '%s'" % date)
2212 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
2213 for c_rev in self.cvs_revs:
2214 Log().write(LOG_WARN, " ", c_rev.fname)
2216 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2217 "'--encoding=latin1'.\n")
2218 # It's better to fall back to the original (unknown encoding) data
2219 # than to either 1) quit or 2) record nothing at all.
2220 return { 'svn:author' : self._author,
2221 'svn:log' : self.get_log_msg(),
2222 'svn:date' : date }
2224 def add_revision(self, cvs_rev):
2225 self.cvs_revs.append(cvs_rev)
2226 if cvs_rev.timestamp > self._max_date:
2227 self._max_date = cvs_rev.timestamp
2229 def _is_primary_commit(self):
2230 """Return true if this is a primary SVNCommit, false otherwise."""
2231 return not (self.symbolic_name or self.motivating_revnum)
2233 def flush(self):
2234 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
2235 % (self.revnum, self._description))
2236 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2238 if self.motivating_revnum is not None:
2239 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2240 self.motivating_revnum)
2242 # If we're not a primary commit, then store our date and/or our
2243 # symbolic_name
2244 if not self._is_primary_commit():
2245 Ctx()._persistence_manager.set_name_and_date(self.revnum,
2246 self.symbolic_name,
2247 self._max_date)
2249 def __str__(self):
2250 """ Print a human-readable description of this SVNCommit. This
2251 description is not intended to be machine-parseable (although
2252 we're not going to stop you if you try!)"""
2254 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2255 if self.symbolic_name:
2256 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
2257 else:
2258 ret = ret + " NO symbolic name\n"
2259 ret = ret + " debug description: " + self._description + "\n"
2260 ret = ret + " cvs_revs:\n"
2261 for c_rev in self.cvs_revs:
2262 ret = ret + " " + c_rev.unique_key() + "\n"
2263 return ret
2265 def get_log_msg(self):
2266 """Returns the actual log message for a primary commit, and the
2267 appropriate manufactured log message for a secondary commit."""
2268 if self.symbolic_name is not None:
2269 return self._log_msg_for_symbolic_name_commit()
2270 elif self.motivating_revnum is not None:
2271 return self._log_msg_for_default_branch_commit()
2272 else:
2273 return self._log_msg
2275 def _log_msg_for_symbolic_name_commit(self):
2276 """Creates a log message for a manufactured commit that fills
2277 self.symbolic_name. If self.is_tag is true, write the log message
2278 as though for a tag, else write it as though for a branch."""
2279 type = 'branch'
2280 if self.is_tag:
2281 type = 'tag'
2283 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2284 space_or_newline = ' '
2285 if len(self.symbolic_name) >= 13:
2286 space_or_newline = '\n'
2288 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2289 % (type, space_or_newline, self.symbolic_name)
2291 def _log_msg_for_default_branch_commit(self):
2292 """Creates a log message for a manufactured commit that
2293 synchronizes a non-trunk default branch with trunk."""
2294 msg = 'This commit was generated by cvs2svn to compensate for ' \
2295 'changes in r%d,\n' \
2296 'which included commits to RCS files with non-trunk default ' \
2297 'branches.\n' % self.motivating_revnum
2298 return msg
2300 class CVSRevisionAggregator:
2301 """This class groups CVSRevisions into CVSCommits that represent
2302 at least one SVNCommit."""
2303 def __init__(self):
2304 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2305 if not Ctx().trunk_only:
2306 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_READ)
2307 self.cvs_commits = {}
2308 self.pending_symbols = {}
2309 # A list of symbols for which we've already encountered the last
2310 # CVSRevision that is a source for that symbol. That is, the
2311 # final fill for this symbol has been done, and we never need to
2312 # fill it again.
2313 self.done_symbols = [ ]
2315 # This variable holds the most recently created primary svn_commit
2316 # object. CVSRevisionAggregator maintains this variable merely
2317 # for its date, so that it can set dates for the SVNCommits
2318 # created in self.attempt_to_commit_symbols().
2319 self.latest_primary_svn_commit = None
2321 Ctx()._symbolings_logger = SymbolingsLogger()
2322 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2323 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2324 DB_OPEN_READ)
2327 def process_revision(self, c_rev):
2328 # Each time we read a new line, we scan the commits we've
2329 # accumulated so far to see if any are ready for processing now.
2330 ready_queue = [ ]
2331 for digest_key, cvs_commit in self.cvs_commits.items():
2332 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2333 ready_queue.append(cvs_commit)
2334 del self.cvs_commits[digest_key]
2335 continue
2336 # If the inbound commit is on the same file as a pending commit,
2337 # close the pending commit to further changes. Don't flush it though,
2338 # as there may be other pending commits dated before this one.
2339 # ### ISSUE: the has_file() check below is not optimal.
2340 # It does fix the dataloss bug where revisions would get lost
2341 # if checked in too quickly, but it can also break apart the
2342 # commits. The correct fix would require tracking the dependencies
2343 # between change sets and committing them in proper order.
2344 if cvs_commit.has_file(c_rev.fname):
2345 unused_id = digest_key + '-'
2346 # Find a string that does is not already a key in
2347 # the self.cvs_commits dict
2348 while self.cvs_commits.has_key(unused_id):
2349 unused_id = unused_id + '-'
2350 self.cvs_commits[unused_id] = cvs_commit
2351 del self.cvs_commits[digest_key]
2353 # Add this item into the set of still-available commits.
2354 if self.cvs_commits.has_key(c_rev.digest):
2355 cvs_commit = self.cvs_commits[c_rev.digest]
2356 else:
2357 author, log = self.metadata_db[c_rev.digest]
2358 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2359 author, log)
2360 cvs_commit = self.cvs_commits[c_rev.digest]
2361 cvs_commit.add_revision(c_rev)
2363 # If there are any elements in the ready_queue at this point, they
2364 # need to be processed, because this latest rev couldn't possibly
2365 # be part of any of them. Sort them into time-order, then process
2366 # 'em.
2367 ready_queue.sort()
2369 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2370 # commits are ready.
2371 if len(ready_queue) == 0:
2372 self.attempt_to_commit_symbols(ready_queue, c_rev)
2374 for cvs_commit in ready_queue[:]:
2375 self.latest_primary_svn_commit \
2376 = cvs_commit.process_revisions(self.done_symbols)
2377 ready_queue.remove(cvs_commit)
2378 self.attempt_to_commit_symbols(ready_queue, c_rev)
2380 def flush(self):
2381 """Commit anything left in self.cvs_commits. Then inform the
2382 SymbolingsLogger that all commits are done."""
2384 ready_queue = [ ]
2385 for k, v in self.cvs_commits.items():
2386 ready_queue.append((v, k))
2388 ready_queue.sort()
2389 for cvs_commit_tuple in ready_queue[:]:
2390 self.latest_primary_svn_commit = \
2391 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2392 ready_queue.remove(cvs_commit_tuple)
2393 del self.cvs_commits[cvs_commit_tuple[1]]
2394 self.attempt_to_commit_symbols([])
2396 if not Ctx().trunk_only:
2397 Ctx()._symbolings_logger.close()
2399 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2401 This function generates 1 SVNCommit for each symbol in
2402 self.pending_symbols that doesn't have an opening CVSRevision in
2403 either QUEUED_COMMITS or self.cvs_commits.values().
2405 If C_REV is not None, then we first add to self.pending_symbols
2406 any symbols from C_REV that C_REV is the last CVSRevision for.
2408 # If we're not doing a trunk-only conversion, get the symbolic
2409 # names that this c_rev is the last *source* CVSRevision for and
2410 # add them to those left over from previous passes through the
2411 # aggregator.
2412 if c_rev and not Ctx().trunk_only:
2413 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2414 self.pending_symbols[sym] = None
2416 # Make a list of all symbols that still have *source* CVSRevisions
2417 # in the pending commit queue (self.cvs_commits).
2418 open_symbols = {}
2419 for sym in self.pending_symbols.keys():
2420 for cvs_commit in self.cvs_commits.values() + queued_commits:
2421 if cvs_commit.opens_symbolic_name(sym):
2422 open_symbols[sym] = None
2423 break
2425 # Sort the pending symbols so that we will always process the
2426 # symbols in the same order, regardless of the order in which the
2427 # dict hashing algorithm hands them back to us. We do this so
2428 # that our tests will get the same results on all platforms.
2429 sorted_pending_symbols_keys = self.pending_symbols.keys()
2430 sorted_pending_symbols_keys.sort()
2431 for sym in sorted_pending_symbols_keys:
2432 if open_symbols.has_key(sym): # sym is still open--don't close it.
2433 continue
2434 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2435 svn_commit.set_symbolic_name(sym)
2436 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2437 svn_commit.flush()
2438 self.done_symbols.append(sym)
2439 del self.pending_symbols[sym]
2442 class SymbolingsReader:
2443 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2444 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2445 returning the correct opening and closing Subversion revision
2446 numbers for a given symbolic name."""
2447 def __init__(self):
2448 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2449 reads the offsets database into memory."""
2450 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2451 # The offsets_db is really small, and we need to read and write
2452 # from it a fair bit, so suck it into memory
2453 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2454 self.offsets = { }
2455 for key in offsets_db.db.keys():
2456 #print " ZOO:", key, offsets_db[key]
2457 self.offsets[key] = offsets_db[key]
2459 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2460 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2461 SymbolicNameFillingGuide object.
2463 Note that if we encounter an opening rev in this fill, but the
2464 corresponding closing rev takes place later than SVN_REVNUM, the
2465 closing will not be passed to SymbolicNameFillingGuide in this
2466 fill (and will be discarded when encountered in a later fill).
2467 This is perfectly fine, because we can still do a valid fill
2468 without the closing--we always try to fill what we can as soon as
2469 we can."""
2470 # It's possible to have a branch start with a file that was added
2471 # on a branch
2472 if not self.offsets.has_key(symbolic_name):
2473 return SymbolicNameFillingGuide(symbolic_name)
2474 # set our read offset for self.symbolings to the offset for
2475 # symbolic_name
2476 self.symbolings.seek(self.offsets[symbolic_name])
2478 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2479 while (1):
2480 fpos = self.symbolings.tell()
2481 line = self.symbolings.readline().rstrip()
2482 if not line:
2483 break
2484 name, revnum, type, svn_path = line.split(" ", 3)
2485 revnum = int(revnum)
2486 if (revnum > svn_revnum
2487 or name != symbolic_name):
2488 break
2489 symbol_fill.register(svn_path, revnum, type)
2491 # get current offset of the read marker and set it to the offset
2492 # for the beginning of the line we just read if we used anything
2493 # we read.
2494 if not symbol_fill.is_empty():
2495 self.offsets[symbolic_name] = fpos
2497 symbol_fill.make_node_tree()
2498 return symbol_fill
2501 class SymbolicNameFillingGuide:
2502 """A SymbolicNameFillingGuide is essentially a node tree
2503 representing the source paths to be copied to fill
2504 self.symbolic_name in the current SVNCommit.
2506 After calling self.register() on a series of openings and closings,
2507 call self.make_node_tree() to prepare self.node_tree for
2508 examination. See the docstring for self.make_node_tree() for
2509 details on the structure of self.node_tree.
2511 By walking self.node_tree and calling self.get_best_revnum() on each
2512 node, the caller can determine what subversion revision number to
2513 copy the path corresponding to that node from. self.node_tree
2514 should be treated as read-only.
2516 The caller can then descend to sub-nodes to see if their "best
2517 revnum" differs from their parents' and if it does, take appropriate
2518 actions to "patch up" the subtrees."""
2519 def __init__(self, symbolic_name):
2520 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2521 prepares it for receiving openings and closings.
2523 Returns a fully functional and armed SymbolicNameFillingGuide
2524 object."""
2525 self.name = symbolic_name
2527 self.opening_key = "/o"
2528 self.closing_key = "/c"
2530 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2532 # { svn_path : { self.opening_key : svn_revnum,
2533 # self.closing_key : svn_revnum }
2534 # ...}
2535 self.things = { }
2537 # The key for the root node of the node tree
2538 self.root_key = '0'
2539 # The dictionary that holds our node tree, seeded with the root key.
2540 self.node_tree = { self.root_key : { } }
2542 def get_best_revnum(self, node, preferred_revnum):
2543 """Determine the best subversion revision number to use when
2544 copying the source tree beginning at NODE. Returns a
2545 subversion revision number.
2547 PREFERRED_REVNUM is passed to self._best_rev and used to
2548 calculate the best_revnum."""
2549 revnum = SVN_INVALID_REVNUM
2551 # Aggregate openings and closings from the rev tree
2552 openings = self._list_revnums_for_key(node, self.opening_key)
2553 closings = self._list_revnums_for_key(node, self.closing_key)
2555 # Score the lists
2556 scores = self._score_revisions(self._sum_revnum_counts(openings),
2557 self._sum_revnum_counts(closings))
2559 revnum, max_score = self._best_rev(scores, preferred_revnum)
2561 if revnum == SVN_INVALID_REVNUM:
2562 sys.stderr.write(error_prefix + ": failed to find a revision "
2563 + "to copy from when copying %s\n" % name)
2564 sys.exit(1)
2565 return revnum, max_score
2568 def _best_rev(self, scores, preferred_rev):
2569 """Return the revision with the highest score from SCORES, a list
2570 returned by _score_revisions(). When the maximum score is shared
2571 by multiple revisions, the oldest revision is selected, unless
2572 PREFERRED_REV is one of the possibilities, in which case, it is
2573 selected."""
2574 max_score = 0
2575 preferred_rev_score = -1
2576 rev = SVN_INVALID_REVNUM
2577 if preferred_rev is None:
2578 # Comparison order of different types is arbitrary. Do not
2579 # expect None to compare less than int values below.
2580 # In Python 2.3 None compares with ints like negative infinity.
2581 # In Python 2.0 None compares with ints like positive infinity.
2582 preferred_rev = SVN_INVALID_REVNUM
2583 for revnum, count in scores:
2584 if count > max_score:
2585 max_score = count
2586 rev = revnum
2587 if revnum <= preferred_rev:
2588 preferred_rev_score = count
2589 if preferred_rev_score == max_score:
2590 rev = preferred_rev
2591 return rev, max_score
2594 def _score_revisions(self, openings, closings):
2595 """Return a list of revisions and scores based on OPENINGS and
2596 CLOSINGS. The returned list looks like:
2598 [(REV1 SCORE1), (REV2 SCORE2), ...]
2600 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2601 self.opening__key and self.closing_key from some file or
2602 directory node, or else None.
2604 Each score indicates that copying the corresponding revision (or
2605 any following revision up to the next revision in the list) of the
2606 object in question would yield that many correct paths at or
2607 underneath the object. There may be other paths underneath it
2608 which are not correct and would need to be deleted or recopied;
2609 those can only be detected by descending and examining their
2610 scores.
2612 If OPENINGS is false, return the empty list."""
2613 # First look for easy outs.
2614 if not openings:
2615 return []
2617 # Must be able to call len(closings) below.
2618 if closings is None:
2619 closings = []
2621 # No easy out, so wish for lexical closures and calculate the scores :-).
2622 scores = []
2623 opening_score_accum = 0
2624 for i in range(len(openings)):
2625 opening_rev, opening_score = openings[i]
2626 opening_score_accum = opening_score_accum + opening_score
2627 scores.append((opening_rev, opening_score_accum))
2628 min = 0
2629 for i in range(len(closings)):
2630 closing_rev, closing_score = closings[i]
2631 done_exact_rev = None
2632 insert_index = None
2633 insert_score = None
2634 for j in range(min, len(scores)):
2635 score_rev, score = scores[j]
2636 if score_rev >= closing_rev:
2637 if not done_exact_rev:
2638 if score_rev > closing_rev:
2639 insert_index = j
2640 insert_score = scores[j-1][1] - closing_score
2641 done_exact_rev = 1
2642 scores[j] = (score_rev, score - closing_score)
2643 else:
2644 min = j + 1
2645 if not done_exact_rev:
2646 scores.append((closing_rev,scores[-1][1] - closing_score))
2647 if insert_index is not None:
2648 scores.insert(insert_index, (closing_rev, insert_score))
2649 return scores
2651 def _sum_revnum_counts(self, rev_list):
2652 """Takes an array of revisions (REV_LIST), for example:
2654 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2656 and adds up every occurrence of each revision and returns a sorted
2657 array of tuples containing (svn_revnum, count):
2659 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2661 s = {}
2662 for k in rev_list: # Add up the scores
2663 if s.has_key(k):
2664 s[k] = s[k] + 1
2665 else:
2666 s[k] = 1
2667 a = s.items()
2668 a.sort()
2669 return a
2671 def _list_revnums_for_key(self, node, revnum_type_key):
2672 """Scan self.node_tree and return a list of all the revision
2673 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2674 for all leaf nodes at and under NODE.
2676 REVNUM_TYPE_KEY should be either self.opening_key or
2677 self.closing_key."""
2678 revnums = []
2680 # If the node has self.opening_key, it must be a leaf node--all
2681 # leaf nodes have at least an opening key (although they may not
2682 # have a closing key. Fetch revnum and return
2683 if (self.node_tree[node].has_key(self.opening_key) and
2684 self.node_tree[node].has_key(revnum_type_key)):
2685 revnums.append(self.node_tree[node][revnum_type_key])
2686 return revnums
2688 for key, node_contents in self.node_tree[node].items():
2689 if key[0] == '/':
2690 continue
2691 revnums = revnums + \
2692 self._list_revnums_for_key(node_contents, revnum_type_key)
2693 return revnums
2695 def register(self, svn_path, svn_revnum, type):
2696 """Collects opening and closing revisions for this
2697 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2698 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2699 first svn revision number that we can copy from (our opening), or
2700 the last (not inclusive) svn revision number that we can copy from
2701 (our closing). TYPE indicates whether this path is an opening or a
2702 a closing.
2704 The opening for a given SVN_PATH must be passed before the closing
2705 for it to have any effect... any closing encountered before a
2706 corresponding opening will be discarded.
2708 It is not necessary to pass a corresponding closing for every
2709 opening.
2711 # Always log an OPENING
2712 if type == OPENING:
2713 self.things[svn_path] = {self.opening_key: svn_revnum}
2714 # Only log a closing if we've already registered the opening for that path.
2715 elif type == CLOSING and self.things.has_key(svn_path):
2716 # When we have a non-trunk default branch, we may have multiple
2717 # closings--only register the first closing we encounter.
2718 if not self.things[svn_path].has_key(self.closing_key):
2719 self.things[svn_path][self.closing_key] = svn_revnum
2721 def make_node_tree(self):
2722 """Generates the SymbolicNameFillingGuide's node tree from
2723 self.things. Each leaf node maps self.opening_key to the earliest
2724 subversion revision from which this node/path may be copied; and
2725 optionally map self.closing_key to the subversion revision one
2726 higher than the last revision from which this node/path may be
2727 copied. Intermediate nodes never contain opening or closing
2728 flags."""
2730 for svn_path, open_close in self.things.items():
2731 parent_key = self.root_key
2733 path_so_far = ""
2734 # Walk up the path, one node at a time.
2735 components = svn_path.split('/')
2736 for component in components:
2737 path_so_far = path_so_far + '/' + component
2739 child_key = None
2740 if not self.node_tree[parent_key].has_key(component):
2741 child_key = gen_key()
2742 self.node_tree[child_key] = { }
2743 self.node_tree[parent_key][component] = child_key
2744 else:
2745 child_key = self.node_tree[parent_key][component]
2747 parent_key = child_key
2748 # Having reached the leaf, attach the value
2749 self.node_tree[parent_key] = open_close
2750 #print_node_tree(self.node_tree, self.root_key)
2752 def is_empty(self):
2753 """Return true if we haven't accumulated any openings or closings,
2754 false otherwise."""
2755 return not len(self.things)
2758 class FillSource:
2759 """Representation of a fill source used by the symbol filler in
2760 SVNRepositoryMirror."""
2761 def __init__(self, prefix, key):
2762 """Create an unscored fill source with a prefix and a key."""
2763 self.prefix = prefix
2764 self.key = key
2765 self.score = None
2766 self.revnum = None
2768 def set_score(self, score, revnum):
2769 """Set the SCORE and REVNUM."""
2770 self.score = score
2771 self.revnum = revnum
2773 def __cmp__(self, other):
2774 """Comparison operator used to sort FillSources in descending
2775 score order."""
2776 if self.score is None or other.score is None:
2777 raise TypeError, 'Tried to compare unscored FillSource'
2778 return cmp(other.score, self.score)
2781 class SVNRepositoryMirror:
2782 """Mirror a Subversion Repository as it is constructed, one
2783 SVNCommit at a time. The mirror is skeletal; it does not contain
2784 file contents. The creation of a dumpfile or Subversion repository
2785 is handled by delegates. See self.add_delegate method for how to
2786 set delegates.
2788 The structure of the repository is kept in two databases and one
2789 hash. The revs_db database maps revisions to root node keys, and
2790 the nodes_db database maps node keys to nodes. A node is a hash
2791 from directory names to keys. Both the revs_db and the nodes_db are
2792 stored on disk and each access is expensive.
2794 The nodes_db database only has the keys for old revisions. The
2795 revision that is being contructed is kept in memory in the new_nodes
2796 hash which is cheap to access.
2798 You must invoke _start_commit between SVNCommits.
2800 *** WARNING *** All path arguments to methods in this class CANNOT
2801 have leading or trailing slashes.
2804 class SVNRepositoryMirrorPathExistsError(Exception):
2805 """Exception raised if an attempt is made to add a path to the
2806 repository mirror and that path already exists in the youngest
2807 revision of the repository."""
2808 pass
2810 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2811 """Exception raised if a CVSRevision is found to have an unexpected
2812 operation (OP) value."""
2813 pass
2815 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2816 """Exception raised if an empty SymbolicNameFillingGuide is returned
2817 during a fill where the branch in question already exists."""
2818 pass
2820 def __init__(self):
2821 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2822 self.delegates = [ ]
2824 # This corresponds to the 'revisions' table in a Subversion fs.
2825 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
2826 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
2828 # This corresponds to the 'nodes' table in a Subversion fs. (We
2829 # don't need a 'representations' or 'strings' table because we
2830 # only track metadata, not file contents.)
2831 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
2832 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
2834 # Start at revision 0 without a root node. It will be created
2835 # by _open_writable_root_node.
2836 self.youngest = 0
2837 self.new_root_key = None
2838 self.new_nodes = { }
2840 if not Ctx().trunk_only:
2841 ###PERF IMPT: Suck this into memory.
2842 self.tags_db = TagsDatabase(DB_OPEN_READ)
2843 self.symbolings_reader = SymbolingsReader()
2845 def _initialize_repository(self, date):
2846 """Initialize the repository by creating the directories for
2847 trunk, tags, and branches. This method should only be called
2848 after all delegates are added to the repository mirror."""
2849 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2850 # magic therein
2851 svn_commit = SVNCommit("Initialization", 1)
2852 svn_commit.set_date(date)
2853 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2855 self._start_commit(svn_commit)
2856 self._mkdir(Ctx().trunk_base)
2857 if not Ctx().trunk_only:
2858 self._mkdir(Ctx().branches_base)
2859 self._mkdir(Ctx().tags_base)
2861 def _start_commit(self, svn_commit):
2862 """Start a new commit."""
2863 if self.youngest > 0:
2864 self._end_commit()
2866 self.youngest = svn_commit.revnum
2867 self.new_root_key = None
2868 self.new_nodes = { }
2870 self._invoke_delegates('start_commit', svn_commit)
2872 def _end_commit(self):
2873 """Called at the end of each commit. This method copies the newly
2874 created nodes to the on-disk nodes db."""
2875 if self.new_root_key is None:
2876 # No changes were made in this revision, so we make the root node
2877 # of the new revision be the same as the last one.
2878 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2879 else:
2880 self.revs_db[str(self.youngest)] = self.new_root_key
2881 # Copy the new nodes to the nodes_db
2882 for key, value in self.new_nodes.items():
2883 self.nodes_db[key] = value
2885 def _get_node(self, key):
2886 """Returns the node contents for KEY which may refer to either
2887 self.nodes_db or self.new_nodes."""
2888 if self.new_nodes.has_key(key):
2889 return self.new_nodes[key]
2890 else:
2891 return self.nodes_db[key]
2893 def _open_readonly_node(self, path, revnum):
2894 """Open a readonly node for PATH at revision REVNUM. Returns the
2895 node key and node contents if the path exists, else (None, None)."""
2896 # Get the root key
2897 if revnum == self.youngest:
2898 if self.new_root_key is None:
2899 node_key = self.revs_db[str(self.youngest - 1)]
2900 else:
2901 node_key = self.new_root_key
2902 else:
2903 node_key = self.revs_db[str(revnum)]
2905 for component in path.split('/'):
2906 node_contents = self._get_node(node_key)
2907 if not node_contents.has_key(component):
2908 return None
2909 node_key = node_contents[component]
2911 return node_key
2913 def _open_writable_root_node(self):
2914 """Open a writable root node. The current root node is returned
2915 immeditely if it is already writable. If not, create a new one by
2916 copying the contents of the root node of the previous version."""
2917 if self.new_root_key is not None:
2918 return self.new_root_key, self.new_nodes[self.new_root_key]
2920 if self.youngest < 2:
2921 new_contents = { }
2922 else:
2923 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2924 self.new_root_key = gen_key()
2925 self.new_nodes = { self.new_root_key: new_contents }
2927 return self.new_root_key, new_contents
2929 def _open_writable_node(self, svn_path, create):
2930 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2931 and any missing directories if CREATE is True."""
2932 parent_key, parent_contents = self._open_writable_root_node()
2934 # Walk up the path, one node at a time.
2935 path_so_far = None
2936 components = svn_path.split('/')
2937 for i in range(len(components)):
2938 component = components[i]
2939 this_key = this_contents = None
2940 path_so_far = _path_join(path_so_far, component)
2941 if parent_contents.has_key(component):
2942 # The component exists.
2943 this_key = parent_contents[component]
2944 if self.new_nodes.has_key(this_key):
2945 this_contents = self.new_nodes[this_key]
2946 else:
2947 # Suck the node from the nodes_db, but update the key
2948 this_contents = self.nodes_db[this_key]
2949 this_key = gen_key()
2950 self.new_nodes[this_key] = this_contents
2951 parent_contents[component] = this_key
2952 elif create:
2953 # The component does not exists, so we create it.
2954 this_contents = { }
2955 this_key = gen_key()
2956 self.new_nodes[this_key] = this_contents
2957 parent_contents[component] = this_key
2958 if i < len(components) - 1:
2959 self._invoke_delegates('mkdir', path_so_far)
2960 else:
2961 # The component does not exists and we are not instructed to
2962 # create it, so we give up.
2963 return None, None
2965 parent_key = this_key
2966 parent_contents = this_contents
2968 return this_key, this_contents
2970 def _path_exists(self, path):
2971 """If PATH exists in self.youngest of the svn repository mirror,
2972 return true, else return None.
2974 PATH must not start with '/'."""
2975 return self._open_readonly_node(path, self.youngest) is not None
2977 def _fast_delete_path(self, parent_path, parent_contents, component):
2978 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2979 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2980 in PARENT_CONTENTS."""
2981 if parent_contents.has_key(component):
2982 del parent_contents[component]
2983 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2985 def _delete_path(self, svn_path, should_prune=False):
2986 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2987 all ancestor directories that are made empty when SVN_PATH is deleted.
2988 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2990 NOTE: This function does *not* allow you delete top-level entries
2991 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2992 those entries."""
2993 pos = svn_path.rfind('/')
2994 parent_path = svn_path[:pos]
2995 entry = svn_path[pos+1:]
2996 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2997 if parent_key is not None:
2998 self._fast_delete_path(parent_path, parent_contents, entry)
2999 # The following recursion makes pruning an O(n^2) operation in the
3000 # worst case (where n is the depth of SVN_PATH), but the worst case
3001 # is probably rare, and the constant cost is pretty low. Another
3002 # drawback is that we issue a delete for each path and not just
3003 # a single delete for the topmost directory pruned.
3004 if (should_prune and len(parent_contents) == 0 and
3005 parent_path.find('/') != -1):
3006 self._delete_path(parent_path, True)
3008 def _mkdir(self, path):
3009 """Create PATH in the repository mirror at the youngest revision."""
3010 self._open_writable_node(path, True)
3011 self._invoke_delegates('mkdir', path)
3013 def _change_path(self, cvs_rev):
3014 """Register a change in self.youngest for the CVS_REV's svn_path
3015 in the repository mirror."""
3016 # We do not have to update the nodes because our mirror is only
3017 # concerned with the presence or absence of paths, and a file
3018 # content change does not cause any path changes.
3019 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
3021 def _add_path(self, cvs_rev):
3022 """Add the CVS_REV's svn_path to the repository mirror."""
3023 self._open_writable_node(cvs_rev.svn_path, True)
3024 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
3026 def _copy_path(self, src_path, dest_path, src_revnum):
3027 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3028 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3029 parent *must* exist, but DEST_PATH *cannot* exist.
3031 Return the node key and the contents of the new node at DEST_PATH
3032 as a dictionary."""
3033 # get the contents of the node of our src_path
3034 src_key = self._open_readonly_node(src_path, src_revnum)
3035 src_contents = self._get_node(src_key)
3037 # Get the parent path and the base path of the dest_path
3038 pos = dest_path.rindex('/')
3039 dest_parent = dest_path[:pos]
3040 dest_basename = dest_path[pos+1:]
3041 dest_parent_key, dest_parent_contents = \
3042 self._open_writable_node(dest_parent, False)
3044 if dest_parent_contents.has_key(dest_basename):
3045 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3046 msg = msg + "when it already exists in the mirror."
3047 raise self.SVNRepositoryMirrorPathExistsError, msg
3049 dest_parent_contents[dest_basename] = src_key
3050 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3052 # Yes sir, src_key and src_contents are also the contents of the
3053 # destination. This is a cheap copy, remember! :-)
3054 return src_key, src_contents
3056 def _fill_symbolic_name(self, svn_commit):
3057 """Performs all copies necessary to create as much of the the tag
3058 or branch SVN_COMMIT.symbolic_name as possible given the current
3059 revision of the repository mirror.
3061 The symbolic name is guaranteed to exist in the Subversion
3062 repository by the end of this call, even if there are no paths
3063 under it."""
3064 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3065 svn_commit.symbolic_name, self.youngest)
3067 # Create the list of sources for the symbolic name. All source
3068 # prefixes must be direct sources for the destination, i.e. we
3069 # must have 'trunk' and 'branches/my_branch' and not just
3070 # 'branches'.
3071 sources = []
3072 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
3073 if entry == Ctx().trunk_base:
3074 sources.append(FillSource(entry, key))
3075 elif entry == Ctx().branches_base:
3076 for entry2, key2 in symbol_fill.node_tree[key].items():
3077 sources.append(FillSource(entry + '/' + entry2, key2))
3078 else:
3079 raise # Should never happen
3080 if self.tags_db.has_key(svn_commit.symbolic_name):
3081 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
3082 else:
3083 dest_prefix = _path_join(Ctx().branches_base,
3084 svn_commit.symbolic_name)
3086 if sources:
3087 dest_key = self._open_writable_node(dest_prefix, False)[0]
3088 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3089 else:
3090 # We can only get here for a branch whose first commit is an add
3091 # (as opposed to a copy).
3092 dest_path = Ctx().branches_base + '/' + symbol_fill.name
3093 if not self._path_exists(dest_path):
3094 # If our symbol_fill was empty, that means that our first
3095 # commit on the branch was to a file added on the branch, and
3096 # that this is our first fill of that branch.
3098 # This case is covered by test 16.
3100 # ...we create the branch by copying trunk from the our
3101 # current revision number minus 1
3102 source_path = Ctx().trunk_base
3103 entries = self._copy_path(source_path, dest_path,
3104 svn_commit.revnum - 1)[1]
3105 # Now since we've just copied trunk to a branch that's
3106 # *supposed* to be empty, we delete any entries in the
3107 # copied directory.
3108 for entry in entries.keys():
3109 del_path = dest_path + '/' + entry
3110 # Delete but don't prune.
3111 self._delete_path(del_path)
3112 else:
3113 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
3114 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3115 msg = msg + "attempted to create a branch that already exists."
3116 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3118 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3119 path = None, parent_source_prefix = None,
3120 preferred_revnum = None, prune_ok = None):
3121 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3122 SOURCES, and recurse into the child items.
3124 DEST_PREFIX is the prefix of the destination directory, e.g.
3125 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3126 FillSource classes that are candidates to be copied to the
3127 destination. DEST_KEY is the key in self.nodes_db to the
3128 destination, or None if the destination does not yet exist.
3130 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3131 are at the top level, e.g. '/tags/my_tag'.
3133 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3134 the parent directory, and PREFERRED_REVNUM is an int which is the
3135 source revision number that the caller (who may have copied KEY's
3136 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3137 then no revision is preferable to any other (which probably means
3138 that no copies have happened yet).
3140 PRUNE_OK means that a copy has been made in this recursion, and
3141 it's safe to prune directories that are not in
3142 SYMBOL_FILL.node_tree, provided that said directory has a source
3143 prefix of one of the PARENT_SOURCE_PREFIX.
3145 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3146 should only be passed in by recursive calls."""
3147 # Calculate scores and revnums for all sources
3148 for source in sources:
3149 src_revnum, score = symbol_fill.get_best_revnum(source.key,
3150 preferred_revnum)
3151 source.set_score(score, src_revnum)
3153 # Sort the sources in descending score order so that we will make
3154 # a eventual copy from the source with the highest score.
3155 sources.sort()
3156 copy_source = sources[0]
3158 src_path = _path_join(copy_source.prefix, path)
3159 dest_path = _path_join(dest_prefix, path)
3161 # Figure out if we shall copy to this destination and delete any
3162 # destination path that is in the way.
3163 do_copy = 0
3164 if dest_key is None:
3165 do_copy = 1
3166 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3167 copy_source.revnum != preferred_revnum):
3168 # We are about to replace the destination, so we need to remove
3169 # it before we perform the copy.
3170 self._delete_path(dest_path)
3171 do_copy = 1
3173 if do_copy:
3174 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3175 copy_source.revnum)
3176 prune_ok = 1
3177 else:
3178 dest_entries = self._get_node(dest_key)
3180 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3181 # elements and the values are lists of FillSource classes where
3182 # this path element exists.
3183 src_entries = {}
3184 for source in sources:
3185 for entry, key in symbol_fill.node_tree[source.key].items():
3186 if entry[0] == '/': # Skip flags
3187 continue
3188 if not src_entries.has_key(entry):
3189 src_entries[entry] = []
3190 src_entries[entry].append(FillSource(source.prefix, key))
3192 if prune_ok:
3193 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3194 delete_list = [ ]
3195 for entry in dest_entries.keys():
3196 if not src_entries.has_key(entry):
3197 delete_list.append(entry)
3198 if delete_list:
3199 if not self.new_nodes.has_key(dest_key):
3200 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3201 # Sort the delete list to get "diffable" dumpfiles.
3202 delete_list.sort()
3203 for entry in delete_list:
3204 self._fast_delete_path(dest_path, dest_entries, entry)
3206 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3207 src_keys = src_entries.keys()
3208 src_keys.sort()
3209 for src_key in src_keys:
3210 if dest_entries.has_key(src_key):
3211 next_dest_key = dest_entries[src_key]
3212 else:
3213 next_dest_key = None
3214 self._fill(symbol_fill, dest_prefix, next_dest_key,
3215 src_entries[src_key], _path_join(path, src_key),
3216 copy_source.prefix, sources[0].revnum, prune_ok)
3218 def _synchronize_default_branch(self, svn_commit):
3219 """Propagate any changes that happened on a non-trunk default
3220 branch to the trunk of the repository. See
3221 CVSCommit._post_commit() for details on why this is necessary."""
3222 for cvs_rev in svn_commit.cvs_revs:
3223 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3224 if self._path_exists(cvs_rev.svn_trunk_path):
3225 # Delete the path on trunk...
3226 self._delete_path(cvs_rev.svn_trunk_path)
3227 # ...and copy over from branch
3228 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
3229 svn_commit.motivating_revnum)
3230 elif cvs_rev.op == OP_DELETE:
3231 # delete trunk path
3232 self._delete_path(cvs_rev.svn_trunk_path)
3233 else:
3234 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3235 % cvs_rev.op)
3236 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3238 def commit(self, svn_commit):
3239 """Add an SVNCommit to the SVNRepository, incrementing the
3240 Repository revision number, and changing the repository. Invoke
3241 the delegates' _start_commit() method."""
3243 if svn_commit.revnum == 2:
3244 self._initialize_repository(svn_commit.get_date())
3246 self._start_commit(svn_commit)
3248 if svn_commit.symbolic_name:
3249 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3250 svn_commit.symbolic_name)
3251 self._fill_symbolic_name(svn_commit)
3252 elif svn_commit.motivating_revnum:
3253 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3254 % svn_commit.motivating_revnum)
3255 self._synchronize_default_branch(svn_commit)
3256 else: # This actually commits CVSRevisions
3257 if len(svn_commit.cvs_revs) > 1: plural = "s"
3258 else: plural = ""
3259 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3260 % (len(svn_commit.cvs_revs), plural))
3261 for cvs_rev in svn_commit.cvs_revs:
3262 # See comment in CVSCommit._commit() for what this is all
3263 # about. Note that although asking self._path_exists() is
3264 # somewhat expensive, we only do it if the first two (cheap)
3265 # tests succeed first.
3266 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3267 and (cvs_rev.rev == "1.1.1.1")
3268 and self._path_exists(cvs_rev.svn_path)):
3269 if cvs_rev.op == OP_ADD:
3270 self._add_path(cvs_rev)
3271 elif cvs_rev.op == OP_CHANGE:
3272 # Fix for Issue #74:
3274 # Here's the scenario. You have file FOO that is imported
3275 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3276 # the file exists.
3278 # Moving forward in time, FOO is deleted on the default
3279 # branch (r1.1.1.2). cvs2svn determines that this delete
3280 # also needs to happen on trunk, so FOO is deleted on
3281 # trunk.
3283 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3284 # not 'dead', we assume it's a change). However, since
3285 # our trunk file has been deleted, svnadmin blows up--you
3286 # can't change a file that doesn't exist!
3288 # Soooo... we just check the path, and if it doesn't
3289 # exist, we do an add... if the path does exist, it's
3290 # business as usual.
3291 if not self._path_exists(cvs_rev.svn_path):
3292 self._add_path(cvs_rev)
3293 else:
3294 self._change_path(cvs_rev)
3296 if cvs_rev.op == OP_DELETE:
3297 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3299 def cleanup(self):
3300 """Callback for the Cleanup.register in self.__init__."""
3301 self.revs_db = None
3302 self.nodes_db = None
3304 def add_delegate(self, delegate):
3305 """Adds DELEGATE to self.delegates.
3307 For every delegate you add, as soon as SVNRepositoryMirror
3308 performs a repository action method, SVNRepositoryMirror will call
3309 the delegate's corresponding repository action method. Multiple
3310 delegates will be called in the order that they are added. See
3311 SVNRepositoryMirrorDelegate for more information."""
3312 self.delegates.append(delegate)
3314 def _invoke_delegates(self, method, *args):
3315 """Iterate through each of our delegates, in the order that they
3316 were added, and call the delegate's method named METHOD with the
3317 arguments in ARGS."""
3318 for delegate in self.delegates:
3319 getattr(delegate, method)(*args)
3321 def finish(self):
3322 """Calls the delegate finish method."""
3323 self._end_commit()
3324 self._invoke_delegates('finish')
3325 self.cleanup()
3328 class SVNCommitItem:
3329 """A wrapper class for CVSRevision objects upon which
3330 Subversion-related data (such as properties) may be hung."""
3332 def __init__(self, c_rev, make_svn_props):
3333 self.c_rev = c_rev
3334 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3335 self.eol_from_mime_type = Ctx().eol_from_mime_type
3336 self.no_default_eol = Ctx().no_default_eol
3337 self.keywords_off = Ctx().keywords_off
3338 self.mime_mapper = Ctx().mime_mapper
3340 # We begin with only a "CVS revision" property.
3341 self.svn_props = { }
3342 if self.set_cvs_revnum_properties:
3343 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3344 make_svn_props = True
3346 # Set mime-type and eol. These two properties are intertwingled;
3347 # follow the conditionals carefully. See also issue #39.
3348 mime_type = None
3349 eol_style = None
3350 keywords = None
3352 if self.mime_mapper:
3353 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3355 if not c_rev.mode == 'b':
3356 if not self.no_default_eol:
3357 eol_style = 'native'
3358 elif mime_type and self.eol_from_mime_type:
3359 if mime_type.startswith("text/"):
3360 eol_style = 'native'
3361 else:
3362 eol_style = None
3363 elif mime_type is None:
3364 # file is kb, and no other mimetype specified
3365 mime_type = 'application/octet-stream'
3367 # Set the svn:keywords property, if appropriate. See issue #2.
3368 if not self.keywords_off and (c_rev.mode is None or c_rev.mode == 'kv' or
3369 c_rev.mode == 'kvl'):
3370 keywords = 'Author Date Id Revision'
3372 # Remember if we need to filter the EOLs. We can't use self.svn_props
3373 # becase they are only set on the first revision and we need to filter
3374 # all revisions.
3375 self.needs_eol_filter = eol_style == 'native'
3377 # Remember if this file has svn:keywords set
3378 self.has_keywords = keywords is not None
3380 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3381 if make_svn_props:
3382 # Tack on the executableness, if any.
3383 if c_rev.file_executable:
3384 self.svn_props['svn:executable'] = '*'
3386 # Set the svn:keywords property, if appropriate. See issue #2.
3387 if keywords:
3388 self.svn_props['svn:keywords'] = 'Author Date Id Revision'
3390 if mime_type:
3391 self.svn_props['svn:mime-type'] = mime_type
3393 if eol_style:
3394 self.svn_props['svn:eol-style'] = eol_style
3397 class SVNRepositoryMirrorDelegate:
3398 """Abstract superclass for any delegate to SVNRepositoryMirror.
3399 Subclasses must implement all of the methods below.
3401 For each method, a subclass implements, in its own way, the
3402 Subversion operation implied by the method's name. For example, for
3403 the add_path method, the DumpfileDelegate would write out a
3404 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3405 would merely print that the path is being added to the repository,
3406 and the RepositoryDelegate would actually cause the path to be added
3407 to the Subversion repository that it is creating.
3410 def start_commit(self, svn_commit):
3411 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3412 see subclass implementation for details."""
3413 raise NotImplementedError
3415 def mkdir(self, path):
3416 """PATH is a string; see subclass implementation for details."""
3417 raise NotImplementedError
3419 def add_path(self, s_item):
3420 """S_ITEM is an SVNCommitItem; see subclass implementation for
3421 details."""
3422 raise NotImplementedError
3424 def change_path(self, s_item):
3425 """S_ITEM is an SVNCommitItem; see subclass implementation for
3426 details."""
3427 raise NotImplementedError
3429 def delete_path(self, path):
3430 """PATH is a string; see subclass implementation for
3431 details."""
3432 raise NotImplementedError
3434 def copy_path(self, src_path, dest_path, src_revnum):
3435 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3436 subversion revision number (int); see subclass implementation for
3437 details."""
3438 raise NotImplementedError
3440 def finish(self):
3441 """Perform any cleanup necessary after all revisions have been
3442 committed."""
3443 raise NotImplementedError
3446 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3447 """Create a Subversion dumpfile."""
3449 def __init__(self, dumpfile_path=None):
3450 """Return a new DumpfileDelegate instance, attached to a dumpfile
3451 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3453 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3454 property on files, when they are changed due to a corresponding
3455 CVS revision.
3457 If Ctx().mime_mapper is not None, then it is a MimeMapper
3458 instance, used to determine whether or not to set the
3459 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3460 is None, files marked with the CVS 'kb' flag will receive a mime
3461 type of "application/octet-stream".
3463 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3464 'native' for files not marked with the CVS 'kb' flag, except as
3465 superseded by Ctx().eol_from_mime_type (see below).
3467 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3468 to 'native' for all files to which Ctx().mime_mapper assigns a
3469 mime type beginning with "text/", and don't set 'svn:eol-style'
3470 for files assigned a type not beginning with "text/".
3471 """
3472 if dumpfile_path:
3473 self.dumpfile_path = dumpfile_path
3474 else:
3475 self.dumpfile_path = Ctx().dumpfile
3476 self.path_encoding = Ctx().encoding
3478 self.dumpfile = open(self.dumpfile_path, 'wb')
3479 self._write_dumpfile_header(self.dumpfile)
3481 def _write_dumpfile_header(self, dumpfile):
3482 # Initialize the dumpfile with the standard headers.
3484 # Since the CVS repository doesn't have a UUID, and the Subversion
3485 # repository will be created with one anyway, we don't specify a
3486 # UUID in the dumpflie
3487 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3489 def _utf8_path(self, path):
3490 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3491 encoded in self.path_encoding."""
3492 try:
3493 # Log messages can be converted with the 'replace' strategy,
3494 # but we can't afford any lossiness here.
3495 unicode_path = unicode(path, self.path_encoding, 'strict')
3496 return unicode_path.encode('utf-8')
3497 except UnicodeError:
3498 print "Unable to convert a path '%s' to internal encoding." % path
3499 print "Consider rerunning with (for example) '--encoding=latin1'"
3500 sys.exit(1)
3502 def start_commit(self, svn_commit):
3503 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3505 self.revision = svn_commit.revnum
3507 # The start of a new commit typically looks like this:
3509 # Revision-number: 1
3510 # Prop-content-length: 129
3511 # Content-length: 129
3513 # K 7
3514 # svn:log
3515 # V 27
3516 # Log message for revision 1.
3517 # K 10
3518 # svn:author
3519 # V 7
3520 # jrandom
3521 # K 8
3522 # svn:date
3523 # V 27
3524 # 2003-04-22T22:57:58.132837Z
3525 # PROPS-END
3527 # Notice that the length headers count everything -- not just the
3528 # length of the data but also the lengths of the lengths, including
3529 # the 'K ' or 'V ' prefixes.
3531 # The reason there are both Prop-content-length and Content-length
3532 # is that the former includes just props, while the latter includes
3533 # everything. That's the generic header form for any entity in a
3534 # dumpfile. But since revisions only have props, the two lengths
3535 # are always the same for revisions.
3537 # Calculate the total length of the props section.
3538 props = svn_commit.get_revprops()
3539 prop_names = props.keys()
3540 prop_names.sort()
3541 total_len = 10 # len('PROPS-END\n')
3542 for propname in prop_names:
3543 if props[propname] is None:
3544 continue
3545 klen = len(propname)
3546 klen_len = len('K %d' % klen)
3547 vlen = len(props[propname])
3548 vlen_len = len('V %d' % vlen)
3549 # + 4 for the four newlines within a given property's section
3550 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3552 # Print the revision header and props
3553 self.dumpfile.write('Revision-number: %d\n'
3554 'Prop-content-length: %d\n'
3555 'Content-length: %d\n'
3556 '\n'
3557 % (self.revision, total_len, total_len))
3559 for propname in prop_names:
3560 if props[propname] is None:
3561 continue
3562 self.dumpfile.write('K %d\n'
3563 '%s\n'
3564 'V %d\n'
3565 '%s\n' % (len(propname),
3566 propname,
3567 len(props[propname]),
3568 props[propname]))
3570 self.dumpfile.write('PROPS-END\n')
3571 self.dumpfile.write('\n')
3573 def mkdir(self, path):
3574 """Emit the creation of directory PATH."""
3575 self.dumpfile.write("Node-path: %s\n"
3576 "Node-kind: dir\n"
3577 "Node-action: add\n"
3578 "Content-length: 10\n"
3579 "\n"
3580 "\n" % self._utf8_path(path))
3582 def _add_or_change_path(self, s_item, op):
3583 """Emit the addition or change corresponding to S_ITEM.
3584 OP is either the constant OP_ADD or OP_CHANGE."""
3586 # Validation stuffs
3587 if op == OP_ADD:
3588 action = 'add'
3589 elif op == OP_CHANGE:
3590 action = 'change'
3591 else:
3592 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3593 % (error_prefix, op))
3594 sys.exit(1)
3596 # Convenience variables
3597 c_rev = s_item.c_rev
3598 svn_props = s_item.svn_props
3600 # The property handling here takes advantage of an undocumented
3601 # but IMHO consistent feature of the Subversion dumpfile-loading
3602 # code. When a node's properties aren't mentioned (that is, the
3603 # "Prop-content-length:" header is absent, no properties are
3604 # listed at all, and there is no "PROPS-END\n" line) then no
3605 # change is made to the node's properties.
3607 # This is consistent with the way dumpfiles behave w.r.t. text
3608 # content changes, so I'm comfortable relying on it. If you
3609 # commit a change to *just* the properties of some node that
3610 # already has text contents from a previous revision, then in the
3611 # dumpfile output for the prop change, no "Text-content-length:"
3612 # nor "Text-content-md5:" header will be present, and the text of
3613 # the file will not be given. But this does not cause the file's
3614 # text to be erased! It simply remains unchanged.
3616 # This works out great for cvs2svn, due to lucky coincidences:
3618 # For files, the only properties we ever set are set in the first
3619 # revision; all other revisions (including on branches) inherit
3620 # from that. After the first revision, we never change file
3621 # properties, therefore, there is no need to remember the full set
3622 # of properties on a given file once we've set it.
3624 # For directories, the only property we set is "svn:ignore", and
3625 # while we may change it after the first revision, we always do so
3626 # based on the contents of a ".cvsignore" file -- in other words,
3627 # CVS is doing the remembering for us, so we still don't have to
3628 # preserve the previous value of the property ourselves.
3630 # Calculate the (sorted-by-name) property string and length, if any.
3631 prop_contents = ''
3632 prop_names = svn_props.keys()
3633 prop_names.sort()
3634 for pname in prop_names:
3635 pval = svn_props[pname]
3636 prop_contents = prop_contents + \
3637 'K %d\n%s\nV %d\n%s\n' \
3638 % (len(pname), pname, len(pval), pval)
3639 if prop_contents:
3640 prop_contents = prop_contents + 'PROPS-END\n'
3641 props_len = len(prop_contents)
3642 else:
3643 props_len = 0
3645 props_header = ''
3646 if props_len:
3647 props_header = 'Prop-content-length: %d\n' % props_len
3649 # treat .cvsignore as a directory property
3650 dir_path, basename = os.path.split(c_rev.svn_path)
3651 if basename == ".cvsignore":
3652 ignore_vals = generate_ignores(c_rev)
3653 ignore_contents = '\n'.join(ignore_vals)
3654 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3655 (len(ignore_contents), ignore_contents))
3656 ignore_contents = ignore_contents + 'PROPS-END\n'
3657 ignore_len = len(ignore_contents)
3659 # write headers, then props
3660 self.dumpfile.write('Node-path: %s\n'
3661 'Node-kind: dir\n'
3662 'Node-action: change\n'
3663 'Prop-content-length: %d\n'
3664 'Content-length: %d\n'
3665 '\n'
3666 '%s'
3667 % (self._utf8_path(dir_path), ignore_len,
3668 ignore_len, ignore_contents))
3670 # If the file has keywords, we must use -kk to prevent CVS/RCS from
3671 # expanding the keywords because they must be unexpanded in the
3672 # repository, or Subversion will get confused.
3673 if s_item.has_keywords:
3674 pipe_cmd, pipe = get_co_pipe(c_rev, '-kk')
3675 else:
3676 pipe_cmd, pipe = get_co_pipe(c_rev)
3678 self.dumpfile.write('Node-path: %s\n'
3679 'Node-kind: file\n'
3680 'Node-action: %s\n'
3681 '%s' # no property header if no props
3682 'Text-content-length: '
3683 % (self._utf8_path(c_rev.svn_path),
3684 action, props_header))
3686 pos = self.dumpfile.tell()
3688 self.dumpfile.write('0000000000000000\n'
3689 'Text-content-md5: 00000000000000000000000000000000\n'
3690 'Content-length: 0000000000000000\n'
3691 '\n')
3693 if prop_contents:
3694 self.dumpfile.write(prop_contents)
3696 # Insert a filter to convert all EOLs to LFs if neccessary
3697 if s_item.needs_eol_filter:
3698 data_reader = LF_EOL_Filter(pipe.fromchild)
3699 else:
3700 data_reader = pipe.fromchild
3702 # Insert the rev contents, calculating length and checksum as we go.
3703 checksum = md5.new()
3704 length = 0
3705 while True:
3706 buf = data_reader.read(PIPE_READ_SIZE)
3707 if buf == '':
3708 break
3709 checksum.update(buf)
3710 length = length + len(buf)
3711 self.dumpfile.write(buf)
3713 pipe.fromchild.close()
3714 error_output = pipe.childerr.read()
3715 exit_status = pipe.wait()
3716 if exit_status:
3717 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3718 "and the following output:\n"
3719 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3721 # Go back to patch up the length and checksum headers:
3722 self.dumpfile.seek(pos, 0)
3723 # We left 16 zeros for the text length; replace them with the real
3724 # length, padded on the left with spaces:
3725 self.dumpfile.write('%16d' % length)
3726 # 16... + 1 newline + len('Text-content-md5: ') == 35
3727 self.dumpfile.seek(pos + 35, 0)
3728 self.dumpfile.write(checksum.hexdigest())
3729 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3730 self.dumpfile.seek(pos + 84, 0)
3731 # The content length is the length of property data, text data,
3732 # and any metadata around/inside around them.
3733 self.dumpfile.write('%16d' % (length + props_len))
3734 # Jump back to the end of the stream
3735 self.dumpfile.seek(0, 2)
3737 # This record is done (write two newlines -- one to terminate
3738 # contents that weren't themselves newline-termination, one to
3739 # provide a blank line for readability.
3740 self.dumpfile.write('\n\n')
3742 def add_path(self, s_item):
3743 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
3744 self._add_or_change_path(s_item, OP_ADD)
3746 def change_path(self, s_item):
3747 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
3748 self._add_or_change_path(s_item, OP_CHANGE)
3750 def delete_path(self, path):
3751 """Emit the deletion of PATH."""
3752 self.dumpfile.write('Node-path: %s\n'
3753 'Node-action: delete\n'
3754 '\n' % self._utf8_path(path))
3756 def copy_path(self, src_path, dest_path, src_revnum):
3757 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3758 # We don't need to include "Node-kind:" for copies; the loader
3759 # ignores it anyway and just uses the source kind instead.
3760 self.dumpfile.write('Node-path: %s\n'
3761 'Node-action: add\n'
3762 'Node-copyfrom-rev: %d\n'
3763 'Node-copyfrom-path: /%s\n'
3764 '\n'
3765 % (self._utf8_path(dest_path),
3766 src_revnum,
3767 self._utf8_path(src_path)))
3769 def finish(self):
3770 """Perform any cleanup necessary after all revisions have been
3771 committed."""
3772 self.dumpfile.close()
3775 class RepositoryDelegate(DumpfileDelegate):
3776 """Creates a new Subversion Repository. DumpfileDelegate does all
3777 of the heavy lifting."""
3778 def __init__(self):
3779 self.svnadmin = Ctx().svnadmin
3780 self.target = Ctx().target
3781 if not Ctx().existing_svnrepos:
3782 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3783 if Ctx().fs_type and Ctx().fs_type != 'bdb':
3784 # User specified something other than bdb.
3785 run_command('%s create %s "%s"' % (self.svnadmin,
3786 "--fs-type=%s" % Ctx().fs_type,
3787 self.target))
3788 elif Ctx().fs_type:
3789 # User explicitly specified bdb.
3791 # Since this is a BDB repository, pass --bdb-txn-nosync,
3792 # because it gives us a 4-5x speed boost (if cvs2svn is
3793 # creating the repository, cvs2svn should be the only program
3794 # accessing the svn repository (until cvs is done, at least)).
3795 # But we'll turn no-sync off in self.finish(), unless
3796 # instructed otherwise.
3797 run_command('%s create %s %s "%s"' % (self.svnadmin,
3798 "--fs-type=bdb",
3799 "--bdb-txn-nosync",
3800 self.target))
3801 else:
3802 # User didn't say what kind repository (bdb, fsfs, etc).
3803 # We still pass --bdb-txn-nosync. It's a no-op if the default
3804 # repository type doesn't support it, but we definitely want
3805 # it if BDB is the default.
3806 run_command('%s create %s "%s"' % (self.svnadmin,
3807 "--bdb-txn-nosync",
3808 self.target))
3811 # Since the output of this run is a repository, not a dumpfile,
3812 # the temporary dumpfiles we create should go in the tmpdir.
3813 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
3815 # This is 1 if a commit is in progress, otherwise None.
3816 self._commit_in_progress = None
3818 self.dumpfile = open(self.dumpfile_path, 'w+b')
3819 self.loader_pipe = Popen3('%s load -q "%s"' % (self.svnadmin, self.target),
3820 True)
3821 self.loader_pipe.fromchild.close()
3822 try:
3823 self._write_dumpfile_header(self.loader_pipe.tochild)
3824 except IOError:
3825 sys.stderr.write("%s: svnadmin failed with the following output while "
3826 "loading the dumpfile:\n" % (error_prefix))
3827 sys.stderr.write(self.loader_pipe.childerr.read())
3828 sys.exit(1)
3830 def _feed_pipe(self):
3831 """Feed the revision stored in the dumpfile to the svnadmin
3832 load pipe."""
3833 self.dumpfile.seek(0)
3834 while 1:
3835 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3836 if not len(data):
3837 break
3838 try:
3839 self.loader_pipe.tochild.write(data)
3840 except IOError:
3841 sys.stderr.write("%s: svnadmin failed with the following output while "
3842 "loading the dumpfile:\n" % (error_prefix))
3843 sys.stderr.write(self.loader_pipe.childerr.read())
3844 sys.exit(1)
3846 def start_commit(self, svn_commit):
3847 """Start a new commit. If a commit is already in progress, close
3848 the dumpfile, load it into the svn repository, open a new
3849 dumpfile, and write the header into it."""
3850 if self._commit_in_progress:
3851 self._feed_pipe()
3852 self.dumpfile.seek(0)
3853 self.dumpfile.truncate()
3854 DumpfileDelegate.start_commit(self, svn_commit)
3855 self._commit_in_progress = 1
3857 def finish(self):
3858 """Loads the last commit into the repository."""
3859 self._feed_pipe()
3860 self.dumpfile.close()
3861 self.loader_pipe.tochild.close()
3862 error_output = self.loader_pipe.childerr.read()
3863 exit_status = self.loader_pipe.wait()
3864 if exit_status:
3865 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3866 'and the following output:\n'
3867 '%s' % (error_prefix, exit_status, error_output))
3868 os.remove(self.dumpfile_path)
3870 # If this is a BDB repository, and we created the repository, and
3871 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
3872 # line in the DB_CONFIG file, because txn syncing should be on by
3873 # default in BDB repositories.
3875 # We determine if this is a BDB repository by looking for the
3876 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
3877 # checking Ctx().fs_type. That way this code will Do The Right
3878 # Thing in all circumstances.
3879 db_config = os.path.join(self.target, "db/DB_CONFIG")
3880 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
3881 and os.path.exists(db_config)):
3882 no_sync = 'set_flags DB_TXN_NOSYNC\n'
3884 contents = open(db_config, 'r').readlines()
3885 index = contents.index(no_sync)
3886 contents[index] = '# ' + no_sync
3887 contents = open(db_config, 'w').writelines(contents)
3890 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3891 """Makes no changes to the disk, but writes out information to
3892 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3893 print statements will state that we're doing something, when in
3894 reality, we aren't doing anything other than printing out that we're
3895 doing something. Kind of zen, really."""
3896 def __init__(self, total_revs):
3897 self.total_revs = total_revs
3899 def start_commit(self, svn_commit):
3900 """Prints out the Subversion revision number of the commit that is
3901 being started."""
3902 Log().write(LOG_VERBOSE, "=" * 60)
3903 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3904 (svn_commit.revnum, self.total_revs))
3906 def mkdir(self, path):
3907 """Print a line stating that we are creating directory PATH."""
3908 Log().write(LOG_VERBOSE, " New Directory", path)
3910 def add_path(self, s_item):
3911 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
3912 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
3914 def change_path(self, s_item):
3915 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
3916 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
3918 def delete_path(self, path):
3919 """Print a line stating that we are 'deleting' PATH."""
3920 Log().write(LOG_VERBOSE, " Deleting", path)
3922 def copy_path(self, src_path, dest_path, src_revnum):
3923 """Print a line stating that we are 'copying' revision SRC_REVNUM
3924 of SRC_PATH to DEST_PATH."""
3925 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3926 Log().write(LOG_VERBOSE, " to", dest_path)
3928 def finish(self):
3929 """State that we are done creating our repository."""
3930 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3931 Log().write(LOG_QUIET, "Done.")
3933 # This should be a local to pass1,
3934 # but Python 2.0 does not support nested scopes.
3935 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3936 def pass1():
3937 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3938 cd = CollectData()
3940 def visit_file(baton, dirname, files):
3941 cd = baton
3942 for fname in files:
3943 if fname[-2:] != ',v':
3944 continue
3945 cd.found_valid_file = 1
3946 pathname = os.path.join(dirname, fname)
3947 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3948 # drop the 'Attic' portion from the pathname for the canonical name.
3949 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3950 else:
3951 # If this file also exists in the attic, it's a fatal error
3952 attic_path = os.path.join(dirname, 'Attic', fname)
3953 if os.path.exists(attic_path):
3954 err = "%s: A CVS repository cannot contain both %s and %s" \
3955 % (error_prefix, pathname, attic_path)
3956 sys.stderr.write(err + '\n')
3957 cd.fatal_errors.append(err)
3958 cd.set_fname(pathname, pathname)
3959 Log().write(LOG_NORMAL, pathname)
3960 try:
3961 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3962 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3963 err = "%s: '%s' is not a valid ,v file" \
3964 % (error_prefix, pathname)
3965 sys.stderr.write(err + '\n')
3966 cd.fatal_errors.append(err)
3967 except:
3968 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3969 raise
3971 os.path.walk(Ctx().cvsroot, visit_file, cd)
3972 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3974 cd.write_symbol_db()
3976 if len(cd.fatal_errors) > 0:
3977 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3978 + "Error summary:\n"
3979 + "\n".join(cd.fatal_errors)
3980 + "\nExited due to fatal error(s).")
3982 if cd.found_valid_file is None:
3983 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3984 + "Are you absolutely certain you are pointing cvs2svn\n"
3985 + "at a CVS repository?\n"
3986 + "\nExited due to fatal error(s).")
3988 StatsKeeper().reset_c_rev_info()
3989 StatsKeeper().archive()
3990 Log().write(LOG_QUIET, "Done")
3992 def pass2():
3993 "Pass 2: clean up the revision information."
3995 symbol_db = SymbolDatabase()
3996 symbol_db.read()
3998 # Convert the list of regexps to a list of strings
3999 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4001 error_detected = 0
4003 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4004 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4005 if blocked_excludes:
4006 for branch, blockers in blocked_excludes.items():
4007 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4008 "excluded because the following symbols depend "
4009 "on it:\n" % (branch))
4010 for blocker in blockers:
4011 sys.stderr.write(" '%s'\n" % (blocker))
4012 sys.stderr.write("\n")
4013 error_detected = 1
4015 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4016 invalid_forced_tags = [ ]
4017 for forced_tag in Ctx().forced_tags:
4018 if excludes.has_key(forced_tag):
4019 continue
4020 if symbol_db.branch_has_commit(forced_tag):
4021 invalid_forced_tags.append(forced_tag)
4022 if invalid_forced_tags:
4023 sys.stderr.write(error_prefix + ": The following branches cannot be "
4024 "forced to be tags because they have commits:\n")
4025 for tag in invalid_forced_tags:
4026 sys.stderr.write(" '%s'\n" % (tag))
4027 sys.stderr.write("\n")
4028 error_detected = 1
4030 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4031 mismatches = symbol_db.find_mismatches(excludes)
4032 def is_not_forced(mismatch):
4033 name = mismatch[0]
4034 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4035 mismatches = filter(is_not_forced, mismatches)
4036 if mismatches:
4037 sys.stderr.write(error_prefix + ": The following symbols are tags "
4038 "in some files and branches in others.\nUse "
4039 "--force-tag, --force-branch and/or --exclude to "
4040 "resolve the symbols.\n")
4041 for name, tag_count, branch_count, commit_count in mismatches:
4042 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4043 "%d files and has commits in %d files.\n"
4044 % (name, tag_count, branch_count, commit_count))
4045 error_detected = 1
4047 # Bail out now if we found errors
4048 if error_detected:
4049 sys.exit(1)
4051 # Create the tags database
4052 tags_db = TagsDatabase(DB_OPEN_NEW)
4053 for tag in symbol_db.tags.keys():
4054 if tag not in Ctx().forced_branches:
4055 tags_db[tag] = None
4056 for tag in Ctx().forced_tags:
4057 tags_db[tag] = None
4059 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4061 # We may have recorded some changes in revisions' timestamp. We need to
4062 # scan for any other files which may have had the same log message and
4063 # occurred at "the same time" and change their timestamps, too.
4065 # read the resync data file
4066 def read_resync(fname):
4067 "Read the .resync file into memory."
4069 ### note that we assume that we can hold the entire resync file in
4070 ### memory. really large repositories with whacky timestamps could
4071 ### bust this assumption. should that ever happen, then it is possible
4072 ### to split the resync file into pieces and make multiple passes,
4073 ### using each piece.
4076 # A digest maps to a sequence of lists which specify a lower and upper
4077 # time bound for matching up the commit. We keep a sequence of these
4078 # because a number of checkins with the same log message (e.g. an empty
4079 # log message) could need to be remapped. We also make them a list because
4080 # we will dynamically expand the lower/upper bound as we find commits
4081 # that fall into a particular msg and time range.
4083 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4085 resync = { }
4087 for line in fileinput.FileInput(fname):
4088 t1 = int(line[:8], 16)
4089 digest = line[9:DIGEST_END_IDX]
4090 t2 = int(line[DIGEST_END_IDX+1:], 16)
4091 t1_l = t1 - COMMIT_THRESHOLD/2
4092 t1_u = t1 + COMMIT_THRESHOLD/2
4093 if resync.has_key(digest):
4094 resync[digest].append([t1_l, t1_u, t2])
4095 else:
4096 resync[digest] = [ [t1_l, t1_u, t2] ]
4098 # For each digest, sort the resync items in it in increasing order,
4099 # based on the lower time bound.
4100 digests = resync.keys()
4101 for digest in digests:
4102 (resync[digest]).sort()
4104 return resync
4106 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4108 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4109 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4111 # process the revisions file, looking for items to clean up
4112 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4113 c_rev = CVSRevision(Ctx(), line[:-1])
4115 # Skip this entire revision if it's on an excluded branch
4116 if excludes.has_key(c_rev.branch_name):
4117 continue
4119 # Remove all references to excluded tags and branches
4120 def not_excluded(symbol, excludes=excludes):
4121 return not excludes.has_key(symbol)
4122 c_rev.branches = filter(not_excluded, c_rev.branches)
4123 c_rev.tags = filter(not_excluded, c_rev.tags)
4125 # Convert all branches that are forced to be tags
4126 for forced_tag in Ctx().forced_tags:
4127 if forced_tag in c_rev.branches:
4128 c_rev.branches.remove(forced_tag)
4129 c_rev.tags.append(forced_tag)
4131 # Convert all tags that are forced to be branches
4132 for forced_branch in Ctx().forced_branches:
4133 if forced_branch in c_rev.tags:
4134 c_rev.tags.remove(forced_branch)
4135 c_rev.branches.append(forced_branch)
4137 # see if this is "near" any of the resync records we
4138 # have recorded for this digest [of the log message].
4139 for record in resync.get(c_rev.digest, []):
4140 if record[0] <= c_rev.timestamp <= record[1]:
4141 # bingo! remap the time on this (record[2] is the new time).
4143 # adjust the time range. we want the COMMIT_THRESHOLD from the
4144 # bounds of the earlier/latest commit in this group.
4145 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4146 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4148 # By default this will be the new timestamp
4149 new_timestamp = record[2]
4150 # If the new timestamp is earlier than that of our previous revision
4151 if record[2] < c_rev.prev_timestamp:
4152 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4153 + " to time %s, which is before previous the time of"
4154 + " revision %s (%s):")
4155 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4156 c_rev.cvs_path, record[2],
4157 c_rev.prev_rev, c_rev.prev_timestamp))
4158 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4159 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4160 # attempted sync time, then sync back to c_rev.prev_timestamp
4161 # + 1...
4162 if (c_rev.prev_timestamp - record[2]) < COMMIT_THRESHOLD:
4163 new_timestamp = c_rev.prev_timestamp + 1
4164 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4165 new_timestamp))
4166 # ...otherwise, make no change
4167 else:
4168 new_timestamp = c_rev.timestamp
4169 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4170 warning_prefix)
4172 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4173 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4174 record[2] - c_rev.timestamp)
4175 Log().write(LOG_VERBOSE, msg)
4177 c_rev.timestamp = new_timestamp
4179 # stop looking for hits
4180 break
4182 output.write(str(c_rev) + "\n")
4183 Log().write(LOG_QUIET, "Done")
4185 def pass3():
4186 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4187 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4188 temp(DATAFILE + SORTED_REVS_SUFFIX))
4189 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4190 Log().write(LOG_QUIET, "Done")
4192 def pass4():
4193 """Iterate through sorted revs, storing them in a database.
4194 If we're not doing a trunk-only conversion, generate the
4195 LastSymbolicNameDatabase, which contains the last CVSRevision
4196 that is a source for each tag or branch.
4198 Log().write(LOG_QUIET,
4199 "Copying CVS revision data from flat file to database...")
4200 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4201 if not Ctx().trunk_only:
4202 Log().write(LOG_QUIET,
4203 "and finding last CVS revisions for all symbolic names...")
4204 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4205 else:
4206 # This is to avoid testing Ctx().trunk_only every time around the loop
4207 class DummyLSNDB:
4208 def noop(*args): pass
4209 log_revision = noop
4210 create_database = noop
4211 last_sym_name_db = DummyLSNDB()
4213 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4214 c_rev = CVSRevision(Ctx(), line[:-1])
4215 cvs_revs_db.log_revision(c_rev)
4216 last_sym_name_db.log_revision(c_rev)
4217 StatsKeeper().record_c_rev(c_rev)
4219 last_sym_name_db.create_database()
4220 StatsKeeper().archive()
4221 Log().write(LOG_QUIET, "Done")
4223 def pass5():
4225 Generate the SVNCommit <-> CVSRevision mapping
4226 databases. CVSCommit._commit also calls SymbolingsLogger to register
4227 CVSRevisions that represent an opening or closing for a path on a
4228 branch or tag. See SymbolingsLogger for more details.
4230 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4232 aggregator = CVSRevisionAggregator()
4233 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4234 c_rev = CVSRevision(Ctx(), line[:-1])
4235 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4236 aggregator.process_revision(c_rev)
4237 aggregator.flush()
4239 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4240 StatsKeeper().archive()
4241 Log().write(LOG_QUIET, "Done")
4243 def pass6():
4244 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4246 if not Ctx().trunk_only:
4247 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4248 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4249 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4250 Log().write(LOG_QUIET, "Done")
4252 def pass7():
4253 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4255 def generate_offsets_for_symbolings():
4256 """This function iterates through all the lines in
4257 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4258 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4259 where SYMBOLIC_NAME is first encountered. This will allow us to
4260 seek to the various offsets in the file and sequentially read only
4261 the openings and closings that we need."""
4263 ###PERF This is a fine example of a db that can be in-memory and
4264 #just flushed to disk when we're done. Later, it can just be sucked
4265 #back into memory.
4266 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4267 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4269 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4270 old_sym = ""
4271 while 1:
4272 fpos = file.tell()
4273 line = file.readline()
4274 if not line:
4275 break
4276 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4277 if not sym == old_sym:
4278 Log().write(LOG_VERBOSE, " ", sym)
4279 old_sym = sym
4280 offsets_db[sym] = fpos
4282 if not Ctx().trunk_only:
4283 generate_offsets_for_symbolings()
4284 Log().write(LOG_QUIET, "Done.")
4286 def pass8():
4287 svncounter = 2 # Repository initialization is 1.
4288 repos = SVNRepositoryMirror()
4289 persistence_manager = PersistenceManager(DB_OPEN_READ)
4291 if (Ctx().target):
4292 if not Ctx().dry_run:
4293 repos.add_delegate(RepositoryDelegate())
4294 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4295 else:
4296 if not Ctx().dry_run:
4297 repos.add_delegate(DumpfileDelegate())
4298 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4300 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4302 while(1):
4303 svn_commit = persistence_manager.get_svn_commit(svncounter)
4304 if not svn_commit:
4305 break
4306 repos.commit(svn_commit)
4307 svncounter += 1
4309 repos.finish()
4311 _passes = [
4312 pass1,
4313 pass2,
4314 pass3,
4315 pass4,
4316 pass5,
4317 pass6,
4318 pass7,
4319 pass8,
4323 class Ctx:
4324 """Session state for this run of cvs2svn. For example, run-time
4325 options are stored here. This class is a Borg, see
4326 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4328 __shared_state = { }
4329 def __init__(self):
4330 self.__dict__ = self.__shared_state
4331 if self.__dict__:
4332 return
4333 # Else, initialize to defaults.
4334 self.cvsroot = None
4335 self.target = None
4336 self.dumpfile = DUMPFILE
4337 self.tmpdir = '.'
4338 self.verbose = 0
4339 self.quiet = 0
4340 self.prune = 1
4341 self.existing_svnrepos = 0
4342 self.dump_only = 0
4343 self.dry_run = 0
4344 self.trunk_only = 0
4345 self.trunk_base = "trunk"
4346 self.tags_base = "tags"
4347 self.branches_base = "branches"
4348 self.encoding = "ascii"
4349 self.mime_types_file = None
4350 self.mime_mapper = None
4351 self.no_default_eol = 0
4352 self.eol_from_mime_type = 0
4353 self.keywords_off = 0
4354 self.use_cvs = None
4355 self.svnadmin = "svnadmin"
4356 self.username = None
4357 self.print_help = 0
4358 self.skip_cleanup = 0
4359 self.cvs_revnums = 0
4360 self.bdb_txn_nosync = 0
4361 self.fs_type = None
4362 self.forced_branches = []
4363 self.forced_tags = []
4364 self.excludes = []
4365 self.symbol_transforms = []
4367 class MimeMapper:
4368 """A class that provides mappings from file names to MIME types.
4369 Note that we should really be using Python's 'mimetypes' module.
4370 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4371 for more."""
4373 def __init__(self):
4374 self.mappings = { }
4376 def set_mime_types_file(self, mime_types_file):
4377 for line in fileinput.input(mime_types_file):
4378 if line.startswith("#"):
4379 continue
4381 # format of a line is something like
4382 # text/plain c h cpp
4383 extensions = line.split()
4384 if len(extensions) < 2:
4385 continue
4386 type = extensions.pop(0)
4387 for ext in extensions:
4388 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4389 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
4390 % (warning_prefix, ext, self.mappings[ext], type))
4391 self.mappings[ext] = type
4394 def get_type_from_filename(self, filename):
4395 basename, extension = os.path.splitext(os.path.basename(filename))
4397 # Extension includes the dot, so strip it (will leave extension
4398 # empty if filename ends with a dot, which is ok):
4399 extension = extension[1:]
4401 # If there is no extension (or the file ends with a period), use
4402 # the base name for mapping. This allows us to set mappings for
4403 # files such as README or Makefile:
4404 if not extension:
4405 extension = basename
4406 if self.mappings.has_key(extension):
4407 return self.mappings[extension]
4408 return None
4411 def convert(start_pass, end_pass):
4412 "Convert a CVS repository to an SVN repository."
4414 cleanup = Cleanup()
4415 times = [ None ] * (end_pass + 1)
4416 times[start_pass - 1] = time.time()
4417 StatsKeeper().set_start_time(time.time())
4418 for i in range(start_pass - 1, end_pass):
4419 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4420 _passes[i]()
4421 times[i + 1] = time.time()
4422 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4423 # Dispose of items in Ctx() not intended to live past the end of the pass
4424 # (Identified by exactly one leading underscore)
4425 for attr in dir(Ctx()):
4426 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4427 and not attr[:6] == "_Ctx__"):
4428 delattr(Ctx(), attr)
4429 if not Ctx().skip_cleanup:
4430 cleanup.cleanup(_passes[i])
4431 StatsKeeper().set_end_time(time.time())
4433 Log().write(LOG_QUIET, StatsKeeper())
4434 if end_pass < 4:
4435 Log().write(LOG_QUIET, '(These are unaltered CVS repository stats and do not\n'
4436 + ' reflect tags or branches excluded via --exclude)\n')
4437 print StatsKeeper().timings()
4440 def usage():
4441 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4442 % os.path.basename(sys.argv[0])
4443 print ' --help, -h print this usage message and exit with success'
4444 print ' --version print the version number'
4445 print ' -q quiet'
4446 print ' -v verbose'
4447 print ' -s PATH path for SVN repos'
4448 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
4449 print ' If only START is given, run only pass START'
4450 print ' (implicitly enables --skip-cleanup)'
4451 print ' --existing-svnrepos load into existing SVN repository'
4452 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4453 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4454 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4455 print ' --dry-run do not create a repository or a dumpfile;'
4456 print ' just print what would happen.'
4457 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4458 print ' (only use this if having problems with RCS)'
4459 print ' --svnadmin=PATH path to the svnadmin program'
4460 print ' --trunk-only convert only trunk commits, not tags nor branches'
4461 print ' --trunk=PATH path for trunk (default: %s)' \
4462 % Ctx().trunk_base
4463 print ' --branches=PATH path for branches (default: %s)' \
4464 % Ctx().branches_base
4465 print ' --tags=PATH path for tags (default: %s)' \
4466 % Ctx().tags_base
4467 print ' --no-prune don\'t prune empty directories'
4468 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4469 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
4470 % Ctx().encoding
4471 print ' --force-branch=NAME force NAME to be a branch'
4472 print ' --force-tag=NAME force NAME to be a tag'
4473 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4474 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4475 print ' use Python regexp and reference syntax respectively'
4476 print ' --username=NAME username for cvs2svn-synthesized commits'
4477 print ' --skip-cleanup prevent the deletion of intermediate files'
4478 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4479 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4480 print ' --cvs-revnums record CVS revision numbers as file properties'
4481 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4482 ' setting svn:mime-type'
4483 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4484 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4485 print ' --keywords-off don\'t set svn:keywords on any files (cvs2svn sets'
4486 print ' "svn:keywords to author date id" on non-binary files'
4487 print ' by default)'
4489 def main():
4490 # Convenience var, so we don't have to keep instantiating this Borg.
4491 ctx = Ctx()
4493 profiling = None
4494 start_pass = 1
4495 end_pass = len(_passes)
4497 try:
4498 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4499 [ "help", "create", "trunk=",
4500 "username=", "existing-svnrepos",
4501 "branches=", "tags=", "encoding=",
4502 "force-branch=", "force-tag=", "exclude=",
4503 "use-cvs", "mime-types=",
4504 "eol-from-mime-type", "no-default-eol",
4505 "trunk-only", "no-prune", "dry-run",
4506 "dump-only", "dumpfile=", "tmpdir=",
4507 "svnadmin=", "skip-cleanup", "cvs-revnums",
4508 "bdb-txn-nosync", "fs-type=",
4509 "version", "profile",
4510 "keywords-off", "symbol-transform="])
4511 except getopt.GetoptError, e:
4512 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4513 usage()
4514 sys.exit(1)
4516 for opt, value in opts:
4517 if opt == '--version':
4518 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4519 sys.exit(0)
4520 elif opt == '-p':
4521 # Don't cleanup if we're doing incrementals.
4522 ctx.skip_cleanup = 1
4523 if value.find(':') > 0:
4524 start_pass, end_pass = map(int, value.split(':'))
4525 else:
4526 end_pass = start_pass = int(value)
4527 if start_pass > len(_passes) or start_pass < 1:
4528 print '%s: illegal value (%d) for starting pass. '\
4529 'must be 1 through %d.' % (error_prefix, int(start_pass),
4530 len(_passes))
4531 sys.exit(1)
4532 if end_pass < start_pass or end_pass > len(_passes):
4533 print '%s: illegal value (%d) for ending pass. ' \
4534 'must be %d through %d.' % (error_prefix, int(end_pass),
4535 int(start_pass), len(_passes))
4536 sys.exit(1)
4537 elif (opt == '--help') or (opt == '-h'):
4538 ctx.print_help = 1
4539 elif opt == '-v':
4540 Log().log_level = LOG_VERBOSE
4541 ctx.verbose = 1
4542 elif opt == '-q':
4543 Log().log_level = LOG_QUIET
4544 ctx.quiet = 1
4545 elif opt == '-s':
4546 ctx.target = value
4547 elif opt == '--existing-svnrepos':
4548 ctx.existing_svnrepos = 1
4549 elif opt == '--dumpfile':
4550 ctx.dumpfile = value
4551 elif opt == '--tmpdir':
4552 ctx.tmpdir = value
4553 elif opt == '--use-cvs':
4554 ctx.use_cvs = 1
4555 elif opt == '--svnadmin':
4556 ctx.svnadmin = value
4557 elif opt == '--trunk-only':
4558 ctx.trunk_only = 1
4559 elif opt == '--trunk':
4560 if not value:
4561 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4562 ctx.trunk_base = value
4563 elif opt == '--branches':
4564 if not value:
4565 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4566 ctx.branches_base = value
4567 elif opt == '--tags':
4568 if not value:
4569 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4570 ctx.tags_base = value
4571 elif opt == '--no-prune':
4572 ctx.prune = None
4573 elif opt == '--dump-only':
4574 ctx.dump_only = 1
4575 elif opt == '--dry-run':
4576 ctx.dry_run = 1
4577 elif opt == '--encoding':
4578 ctx.encoding = value
4579 elif opt == '--force-branch':
4580 ctx.forced_branches.append(value)
4581 elif opt == '--force-tag':
4582 ctx.forced_tags.append(value)
4583 elif opt == '--exclude':
4584 try:
4585 ctx.excludes.append(re.compile('^' + value + '$'))
4586 except re.error, e:
4587 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4588 elif opt == '--mime-types':
4589 ctx.mime_types_file = value
4590 elif opt == '--eol-from-mime-type':
4591 ctx.eol_from_mime_type = 1
4592 elif opt == '--no-default-eol':
4593 ctx.no_default_eol = 1
4594 elif opt == '--keywords-off':
4595 ctx.keywords_off = 1
4596 elif opt == '--username':
4597 ctx.username = value
4598 elif opt == '--skip-cleanup':
4599 ctx.skip_cleanup = 1
4600 elif opt == '--cvs-revnums':
4601 ctx.cvs_revnums = 1
4602 elif opt == '--bdb-txn-nosync':
4603 ctx.bdb_txn_nosync = 1
4604 elif opt == '--fs-type':
4605 ctx.fs_type = value
4606 elif opt == '--create':
4607 sys.stderr.write(warning_prefix +
4608 ': The behaviour produced by the --create option is now the '
4609 'default,\nand passing the option is deprecated.\n')
4610 elif opt == '--profile':
4611 profiling = 1
4612 elif opt == '--symbol-transform':
4613 ctx.symbol_transforms.append(value.split(":"))
4615 if ctx.print_help:
4616 usage()
4617 sys.exit(0)
4619 # Consistency check for options and arguments.
4620 if len(args) == 0:
4621 usage()
4622 sys.exit(1)
4624 if len(args) > 1:
4625 sys.stderr.write(error_prefix +
4626 ": must pass only one CVS repository.\n")
4627 usage()
4628 sys.exit(1)
4630 ctx.cvsroot = args[0]
4632 if not os.path.isdir(ctx.cvsroot):
4633 sys.stderr.write(error_prefix +
4634 ": the given CVS repository path '%s' is not an "
4635 "existing directory.\n" % ctx.cvsroot)
4636 sys.exit(1)
4638 if ctx.use_cvs:
4639 # Ascend above the specified root if necessary, to find the cvs_repository
4640 # (a directory containing a CVSROOT directory) and the cvs_module (the
4641 # path of the conversion root within the cvs repository)
4642 # NB: cvs_module must be seperated by '/' *not* by os.sep .
4643 ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
4644 prev_cvs_repository = None
4645 ctx.cvs_module = ""
4646 while prev_cvs_repository != ctx.cvs_repository:
4647 if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
4648 break
4649 prev_cvs_repository = ctx.cvs_repository
4650 ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
4651 ctx.cvs_module = module_component + "/" + ctx.cvs_module
4652 else:
4653 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
4654 sys.stderr.write(error_prefix +
4655 ": the path '%s' is not a CVS repository, nor a path " \
4656 "within a CVS repository. A CVS repository contains " \
4657 "a CVSROOT directory within its root directory.\n" \
4658 % ctx.cvsroot)
4659 sys.exit(1)
4660 os.environ['CVSROOT'] = ctx.cvs_repository
4662 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
4663 sys.stderr.write(error_prefix +
4664 ": must pass one of '-s' or '--dump-only'.\n")
4665 sys.exit(1)
4667 def not_both(opt1val, opt1name, opt2val, opt2name):
4668 if opt1val and opt2val:
4669 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4670 % (opt1name, opt2name))
4671 sys.exit(1)
4673 not_both(ctx.target, '-s',
4674 ctx.dump_only, '--dump-only')
4676 not_both(ctx.dump_only, '--dump-only',
4677 ctx.existing_svnrepos, '--existing-svnrepos')
4679 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4680 ctx.existing_svnrepos, '--existing-svnrepos')
4682 not_both(ctx.dump_only, '--dump-only',
4683 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4685 not_both(ctx.quiet, '-q',
4686 ctx.verbose, '-v')
4688 not_both(ctx.fs_type, '--fs-type',
4689 ctx.existing_svnrepos, '--existing-svnrepos')
4691 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
4692 sys.stderr.write(error_prefix +
4693 ": cannot pass --bdb-txn-nosync with --fs-type=%s.\n" \
4694 % ctx.fs_type)
4695 sys.exit(1)
4697 if ((string.find(ctx.trunk_base, '/') > -1)
4698 or (string.find(ctx.tags_base, '/') > -1)
4699 or (string.find(ctx.branches_base, '/') > -1)):
4700 sys.stderr.write("%s: cannot pass multicomponent path to "
4701 "--trunk, --tags, or --branches yet.\n"
4702 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4703 "id=7 for details.\n" % error_prefix)
4704 sys.exit(1)
4706 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4707 sys.stderr.write(error_prefix +
4708 ": the svn-repos-path '%s' is not an "
4709 "existing directory.\n" % ctx.target)
4710 sys.exit(1)
4712 if not ctx.dump_only and not ctx.existing_svnrepos \
4713 and (not ctx.dry_run) and os.path.exists(ctx.target):
4714 sys.stderr.write(error_prefix +
4715 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4716 "'--existing-svnrepos'.\n" % ctx.target)
4717 sys.exit(1)
4719 if ctx.mime_types_file:
4720 ctx.mime_mapper = MimeMapper()
4721 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4723 # Make sure the tmp directory exists. Note that we don't check if
4724 # it's empty -- we want to be able to use, for example, "." to hold
4725 # tempfiles. But if we *did* want check if it were empty, we'd do
4726 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
4727 if not os.path.exists(ctx.tmpdir):
4728 os.mkdir(ctx.tmpdir)
4729 elif not os.path.isdir(ctx.tmpdir):
4730 sys.stderr.write(error_prefix +
4731 ": cvs2svn tried to use '%s' for temporary files, but that path\n"
4732 " exists and is not a directory. Please make it be a directory,\n"
4733 " or specify some other directory for temporary files.\n" \
4734 % ctx.tmpdir)
4735 sys.exit(1)
4737 if ctx.use_cvs:
4738 def cvs_ok():
4739 pipe = Popen3('cvs %s --version' % Ctx().cvs_global_arguments, True)
4740 pipe.tochild.close()
4741 pipe.fromchild.read()
4742 errmsg = pipe.childerr.read()
4743 status = pipe.wait()
4744 ok = len(errmsg) == 0 and status == 0
4745 return (ok, status, errmsg)
4747 ctx.cvs_global_arguments = "-q -R"
4748 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4749 if not ok:
4750 ctx.cvs_global_arguments = "-q"
4751 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4753 if not ok:
4754 sys.stderr.write(error_prefix +
4755 ": error executing CVS: status %s, error output:\n" \
4756 % (cvs_exitstatus) + cvs_errmsg)
4758 # But do lock the tmpdir, to avoid process clash.
4759 try:
4760 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4761 except OSError, e:
4762 if e.errno == errno.EACCES:
4763 sys.stderr.write(error_prefix + ": Permission denied:"
4764 + " No write access to output directory.\n")
4765 sys.exit(1)
4766 if e.errno == errno.EEXIST:
4767 sys.stderr.write(error_prefix +
4768 ": cvs2svn is using directory '%s' for temporary files, but\n"
4769 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
4770 " cvs2svn process is currently using '%s' as its temporary\n"
4771 " workspace. If you are certain that is not the case,\n"
4772 " then remove the '%s/cvs2svn.lock' subdirectory.\n" \
4773 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir))
4774 sys.exit(1)
4775 raise
4776 try:
4777 if profiling:
4778 import hotshot
4779 prof = hotshot.Profile('cvs2svn.hotshot')
4780 prof.runcall(convert, start_pass, end_pass)
4781 prof.close()
4782 else:
4783 convert(start_pass, end_pass)
4784 finally:
4785 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4786 except: pass
4788 if __name__ == '__main__':
4789 main()