Fix up help message w.r.t. exact value of svn:keywords.
[cvs2svn.git] / cvs2svn
blobb0bdd1d94d174a422ade4e5be687cc9bde16a45e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
37 # Warnings and errors start with these strings. They are typically
38 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
39 warning_prefix = "WARNING"
40 error_prefix = "ERROR"
42 # Make sure this Python is recent enough.
43 if sys.hexversion < 0x2000000:
44 sys.stderr.write("'%s: Python 2.0 or higher required, "
45 "see www.python.org.\n" % error_prefix)
46 sys.exit(1)
48 # Pretend we have true booleans on older python versions
49 try:
50 True
51 except:
52 True = 1
53 False = 0
55 # Minimal, incomplete, version of popen2.Popen3 for those platforms
56 # for which popen2 does not provide it.
57 try:
58 Popen3 = popen2.Popen3
59 except AttributeError:
60 class Popen3:
61 def __init__(self, cmd, capturestderr):
62 if type(cmd) != str:
63 cmd = " ".join(cmd)
64 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
65 mode='b')
66 def wait(self):
67 return self.fromchild.close() or self.tochild.close() or \
68 self.childerr.close()
70 # DBM module selection
72 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
73 # so that the dbhash module used by anydbm will use bsddb3.
74 try:
75 import bsddb3
76 sys.modules['bsddb'] = sys.modules['bsddb3']
77 except ImportError:
78 pass
80 # 2. These DBM modules are not good for cvs2svn.
81 import anydbm
82 if (anydbm._defaultmod.__name__ == 'dumbdbm'
83 or anydbm._defaultmod.__name__ == 'dbm'):
84 print 'ERROR: your installation of Python does not contain a suitable'
85 print ' DBM module. This script cannot continue.'
86 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
87 print ' for details.'
88 sys.exit(1)
90 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
91 # Unfortunately, gdbm appears not to be trouble free, either.
92 if hasattr(anydbm._defaultmod, 'bsddb') \
93 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
94 try:
95 gdbm = __import__('gdbm')
96 except ImportError:
97 sys.stderr.write(warning_prefix +
98 ': The version of the bsddb module found '
99 'on your computer has been reported to malfunction on some datasets, '
100 'causing KeyError exceptions. You may wish to upgrade your Python to '
101 'version 2.3 or later.\n')
102 else:
103 anydbm._defaultmod = gdbm
105 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
106 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
107 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
109 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
111 # This really only matches standard '1.1.1.*'-style vendor revisions.
112 # One could conceivably have a file whose default branch is 1.1.3 or
113 # whatever, or was that at some point in time, with vendor revisions
114 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
115 # is the only time this regexp gets used), we'd have no basis for
116 # assuming that the non-standard vendor branch had ever been the
117 # default branch anyway, so we don't want this to match them anyway.
118 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
120 # If this run's output is a repository, then (in the tmpdir) we use
121 # a dumpfile of this name for repository loads.
123 # If this run's output is a dumpfile, then this is default name of
124 # that dumpfile, but in the current directory (unless the user has
125 # specified a dumpfile path, of course, in which case it will be
126 # wherever the user said).
127 DUMPFILE = 'cvs2svn-dump'
129 # This file appears with different suffixes at different stages of
130 # processing. CVS revisions are cleaned and sorted here, for commit
131 # grouping. See design-notes.txt for details.
132 DATAFILE = 'cvs2svn-data'
134 # This file contains a marshalled copy of all the statistics that we
135 # gather throughout the various runs of cvs2svn. The data stored as a
136 # marshalled dictionary.
137 STATISTICS_FILE = 'cvs2svn-statistics'
139 # This text file contains records (1 per line) that describe svn
140 # filesystem paths that are the opening and closing source revisions
141 # for copies to tags and branches. The format is as follows:
143 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
145 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
146 # SVN_REVNUM are the primary and secondary sorting criteria for
147 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
148 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
149 # A sorted version of the above file.
150 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
152 # This file is a temporary file for storing symbolic_name -> closing
153 # CVSRevision until the end of our pass where we can look up the
154 # corresponding SVNRevNum for the closing revs and write these out to
155 # the SYMBOL_OPENINGS_CLOSINGS.
156 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
158 # Skeleton version of an svn filesystem.
159 # (These supersede and will eventually replace the two above.)
160 # See class SVNRepositoryMirror for how these work.
161 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
162 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
164 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
165 # SYMBOL_OPENINGS_CLOSINGS_SORTED
166 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
168 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
169 # the CVSRevision is the last such that is a source for those symbolic
170 # names. For example, if branch B's number is 1.3.0.2 in this CVS
171 # file, and this file's 1.3 is the latest (by date) revision among
172 # *all* CVS files that is a source for branch B, then the
173 # CVSRevision.unique_key() corresponding to this file at 1.3 would
174 # list at least B in its list.
175 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
177 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
178 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
179 ### the s-revs data in this database.
180 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
182 # Lists all symbolic names that are tags. Keys are strings (symbolic
183 # names), values are ignorable.
184 TAGS_DB = 'cvs2svn-tags.db'
186 # A list all tags. Each line consists of the tag name and the number
187 # of files in which it exists, separated by a space.
188 TAGS_LIST = 'cvs2svn-tags.txt'
190 # A list of all branches. The file is stored as a plain text file
191 # to make it easy to look at in an editor. Each line contains the
192 # branch name, the number of files where the branch is created, the
193 # commit count, and a list of tags and branches that are defined on
194 # revisions in the branch.
195 BRANCHES_LIST = 'cvs2svn-branches.txt'
197 # These two databases provide a bidirectional mapping between
198 # CVSRevision.unique_key()s and Subversion revision numbers.
200 # The first maps CVSRevision.unique_key() to a number; the values are
201 # not unique.
203 # The second maps a number to a list of CVSRevision.unique_key()s.
204 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
205 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
207 # This database maps svn_revnums to tuples of (symbolic_name, date).
209 # The svn_revnums are the revision numbers of all non-primary
210 # SVNCommits. No primary SVNCommit has a key in this database.
212 # The date is stored for all commits in this database.
214 # For commits that fill symbolic names, the symbolic_name is stored.
215 # For commits that default branch syncs, the symbolic_name is None.
216 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
218 # This database maps svn_revnums of a default branch synchronization
219 # commit to the svn_revnum of the primary SVNCommit that motivated it.
221 # (NOTE: Secondary commits that fill branches and tags also have a
222 # motivating commit, but we do not record it because it is (currently)
223 # not needed for anything.)
225 # This mapping is used when generating the log message for the commit
226 # that synchronizes the default branch with trunk.
227 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
229 # How many bytes to read at a time from a pipe. 128 kiB should be
230 # large enough to be efficient without wasting too much memory.
231 PIPE_READ_SIZE = 128 * 1024
233 # Record the default RCS branches, if any, for CVS filepaths.
235 # The keys are CVS filepaths, relative to the top of the repository
236 # and with the ",v" stripped off, so they match the cvs paths used in
237 # Commit.commit(). The values are vendor branch revisions, such as
238 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
239 # represents the highest vendor branch revision thought to have ever
240 # been head of the default branch.
242 # The reason we record a specific vendor revision, rather than a
243 # default branch number, is that there are two cases to handle:
245 # One case is simple. The RCS file lists a default branch explicitly
246 # in its header, such as '1.1.1'. In this case, we know that every
247 # revision on the vendor branch is to be treated as head of trunk at
248 # that point in time.
250 # But there's also a degenerate case. The RCS file does not currently
251 # have a default branch, yet we can deduce that for some period in the
252 # past it probably *did* have one. For example, the file has vendor
253 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
254 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
255 # case, we should record 1.1.1.96 as the last vendor revision to have
256 # been the head of the default branch.
257 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
259 # Records the author and log message for each changeset.
260 # The keys are author+log digests, the same kind used to identify
261 # unique revisions in the .revs, etc files. Each value is a tuple
262 # of two elements: '(author logmessage)'.
263 METADATA_DB = "cvs2svn-metadata.db"
265 REVS_SUFFIX = '.revs'
266 CLEAN_REVS_SUFFIX = '.c-revs'
267 SORTED_REVS_SUFFIX = '.s-revs'
268 RESYNC_SUFFIX = '.resync'
270 SVN_INVALID_REVNUM = -1
272 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
274 # Things that can happen to a file.
275 OP_NOOP = '-'
276 OP_ADD = 'A'
277 OP_DELETE = 'D'
278 OP_CHANGE = 'C'
280 # A deltatext either does or doesn't represent some change.
281 DELTATEXT_NONEMPTY = 'N'
282 DELTATEXT_EMPTY = 'E'
284 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
286 # Constants used in SYMBOL_OPENINGS_CLOSINGS
287 OPENING = 'O'
288 CLOSING = 'C'
290 def temp(basename):
291 """Return a path to BASENAME in Ctx().tmpdir.
292 This is a convenience function to save horizontal space in source."""
293 return os.path.join(Ctx().tmpdir, basename)
295 # Since the unofficial set also includes [/\] we need to translate those
296 # into ones that don't conflict with Subversion limitations.
297 def _clean_symbolic_name(name):
298 """Return symbolic name NAME, translating characters that Subversion
299 does not allow in a pathname."""
300 name = name.replace('/','++')
301 name = name.replace('\\','--')
302 return name
304 def _path_join(*components):
305 """Join two or more pathname COMPONENTS, inserting '/' as needed.
306 Empty component are skipped."""
307 return string.join(filter(None, components), '/')
309 def run_command(command):
310 if os.system(command):
311 sys.exit('Command failed: "%s"' % command)
313 def relative_name(cvsroot, fname):
314 l = len(cvsroot)
315 if fname[:l] == cvsroot:
316 if fname[l] == os.sep:
317 return string.replace(fname[l+1:], os.sep, '/')
318 return string.replace(fname[l:], os.sep, '/')
319 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
320 " cvsroot\n" % (error_prefix, cvsroot, fname))
321 sys.exit(1)
323 def get_co_pipe(c_rev, extra_arguments=''):
324 """Return a command string, and the pipe created using that string.
325 C_REV is a CVSRevision, and EXTRA_ARGUMENTS is used to add extra
326 arguments. The pipe returns the text of that CVS Revision."""
327 ctx = Ctx()
328 if ctx.use_cvs:
329 pipe_cmd = 'cvs %s co -r%s -p %s %s' % \
330 (ctx.cvs_global_arguments, c_rev.rev, extra_arguments,
331 escape_shell_arg(ctx.cvs_module + c_rev.cvs_path))
332 else:
333 pipe_cmd = 'co -q -x,v -p%s %s %s' % \
334 (c_rev.rev, extra_arguments, escape_shell_arg(c_rev.rcs_path()))
335 pipe = Popen3(pipe_cmd, True)
336 pipe.tochild.close()
337 return pipe_cmd, pipe
339 def generate_ignores(c_rev):
340 # Read in props
341 pipe_cmd, pipe = get_co_pipe(c_rev)
342 buf = pipe.fromchild.read(PIPE_READ_SIZE)
343 raw_ignore_val = ""
344 while buf:
345 raw_ignore_val = raw_ignore_val + buf
346 buf = pipe.fromchild.read(PIPE_READ_SIZE)
347 pipe.fromchild.close()
348 error_output = pipe.childerr.read()
349 exit_status = pipe.wait()
350 if exit_status:
351 sys.exit("%s: The command '%s' failed with exit status: %s\n"
352 "and the following output:\n"
353 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
355 # Tweak props: First, convert any spaces to newlines...
356 raw_ignore_val = '\n'.join(raw_ignore_val.split())
357 raw_ignores = raw_ignore_val.split('\n')
358 ignore_vals = [ ]
359 for ignore in raw_ignores:
360 # Reset the list if we encounter a '!'
361 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
362 if ignore == '!':
363 ignore_vals = [ ]
364 continue
365 # Skip empty lines
366 if len(ignore) == 0:
367 continue
368 ignore_vals.append(ignore)
369 return ignore_vals
371 # Return a string that has not been returned by gen_key() before.
372 gen_key_base = 0L
373 def gen_key():
374 global gen_key_base
375 key = '%x' % gen_key_base
376 gen_key_base = gen_key_base + 1
377 return key
379 # ============================================================================
380 # This code is copied with a few modifications from:
381 # subversion/subversion/bindings/swig/python/svn/core.py
383 if sys.platform == "win32":
384 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
386 def escape_shell_arg(arg):
387 # The (very strange) parsing rules used by the C runtime library are
388 # described at:
389 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
391 # double up slashes, but only if they are followed by a quote character
392 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
394 # surround by quotes and escape quotes inside
395 arg = '"' + string.replace(arg, '"', '"^""') + '"'
396 return arg
399 def argv_to_command_string(argv):
400 """Flatten a list of command line arguments into a command string.
402 The resulting command string is expected to be passed to the system
403 shell which os functions like popen() and system() invoke internally.
406 # According cmd's usage notes (cmd /?), it parses the command line by
407 # "seeing if the first character is a quote character and if so, stripping
408 # the leading character and removing the last quote character."
409 # So to prevent the argument string from being changed we add an extra set
410 # of quotes around it here.
411 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
413 else:
414 def escape_shell_arg(str):
415 return "'" + string.replace(str, "'", "'\\''") + "'"
417 def argv_to_command_string(argv):
418 """Flatten a list of command line arguments into a command string.
420 The resulting command string is expected to be passed to the system
421 shell which os functions like popen() and system() invoke internally.
424 return string.join(map(escape_shell_arg, argv), " ")
425 # ============================================================================
427 def format_date(date):
428 """Return an svn-compatible date string for DATE (seconds since epoch)."""
429 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
430 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
432 def sort_file(infile, outfile):
433 # sort the log files
435 # GNU sort will sort our dates differently (incorrectly!) if our
436 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
437 # it to 'C'
438 if os.environ.has_key('LC_ALL'):
439 lc_all_tmp = os.environ['LC_ALL']
440 else:
441 lc_all_tmp = None
442 os.environ['LC_ALL'] = 'C'
443 # The -T option to sort has a nice side effect. The Win32 sort is
444 # case insensitive and cannot be used, and since it does not
445 # understand the -T option and dies if we try to use it, there is
446 # no risk that we use that sort by accident.
447 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
448 if lc_all_tmp is None:
449 del os.environ['LC_ALL']
450 else:
451 os.environ['LC_ALL'] = lc_all_tmp
453 def print_node_tree(tree, root_node, indent_depth=0):
454 """For debugging purposes. Prints all nodes in TREE that are
455 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
456 debugging with the print statement in this function."""
457 if not indent_depth:
458 print "TREE", "=" * 75
459 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
460 for key, value in tree[root_node].items():
461 if key[0] == '/': #Skip flags
462 continue
463 print_node_tree(tree, value, (indent_depth + 1))
465 def match_regexp_list(regexp_list, string):
466 """Test whether STRING matches any of the compiled regexps in REGEXP_LIST."""
467 for regexp in regexp_list:
468 if regexp.match(string):
469 return True
470 return False
472 class LF_EOL_Filter:
473 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
474 into LFs only."""
475 def __init__(self, stream):
476 self.stream = stream
477 self.carry_cr = False
478 self.eof = False
480 def read(self, size):
481 while True:
482 buf = self.stream.read(size)
483 self.eof = len(buf) == 0
484 if self.carry_cr:
485 buf = '\r' + buf
486 self.carry_cr = False
487 if not self.eof and buf[-1] == '\r':
488 self.carry_cr = True
489 buf = buf[:-1]
490 buf = string.replace(buf, '\r\n', '\n')
491 buf = string.replace(buf, '\r', '\n')
492 if len(buf) > 0 or self.eof:
493 return buf
496 # These constants represent the log levels that this script supports
497 LOG_WARN = -1
498 LOG_QUIET = 0
499 LOG_NORMAL = 1
500 LOG_VERBOSE = 2
501 class Log:
502 """A Simple logging facility. Each line will be timestamped is
503 self.use_timestamps is TRUE. This class is a Borg, see
504 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
505 __shared_state = {}
506 def __init__(self):
507 self.__dict__ = self.__shared_state
508 if self.__dict__:
509 return
510 self.log_level = LOG_NORMAL
511 # Set this to true if you want to see timestamps on each line output.
512 self.use_timestamps = None
513 self.logger = sys.stdout
515 def _timestamp(self):
516 """Output a detailed timestamp at the beginning of each line output."""
517 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
519 def write(self, log_level, *args):
520 """This is the public method to use for writing to a file. Only
521 messages whose LOG_LEVEL is <= self.log_level will be printed. If
522 there are multiple ARGS, they will be separated by a space."""
523 if log_level > self.log_level:
524 return
525 if self.use_timestamps:
526 self._timestamp()
527 self.logger.write(' '.join(map(str,args)) + "\n")
528 # Ensure that log output doesn't get out-of-order with respect to
529 # stderr output.
530 self.logger.flush()
533 class Cleanup:
534 """This singleton class manages any files created by cvs2svn. When
535 you first create a file, call Cleanup.register, passing the
536 filename, and the last pass that you need the file. After the end
537 of that pass, your file will be cleaned up after running an optional
538 callback. This class is a Borg, see
539 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
541 __shared_state = {}
542 def __init__(self):
543 self.__dict__ = self.__shared_state
544 if self.__dict__:
545 return
546 self._log = {}
547 self._callbacks = {}
549 def register(self, file, which_pass, callback=None):
550 """Register FILE for cleanup at the end of WHICH_PASS, running
551 function CALLBACK prior to removal. Registering a given FILE is
552 idempotent; you may register as many times as you wish, but it
553 will only be cleaned up once.
555 Note that if a file is registered multiple times, only the first
556 callback registered for that file will be called at cleanup
557 time. Also note that if you register a database file you must
558 close the database before cleanup, e.g. using a callback."""
559 if not self._log.has_key(which_pass):
560 self._log[which_pass] = {}
561 self._log[which_pass][file] = 1
562 if callback and not self._callbacks.has_key(file):
563 self._callbacks[file] = callback
565 def cleanup(self, which_pass):
566 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
567 if not self._log.has_key(which_pass):
568 return
569 for file in self._log[which_pass].keys():
570 Log().write(LOG_VERBOSE, "Deleting", file)
571 if self._callbacks.has_key(file):
572 self._callbacks[file]()
573 os.unlink(file)
576 # Always use these constants for opening databases.
577 DB_OPEN_READ = 'r'
578 DB_OPEN_NEW = 'n'
580 # A wrapper for anydbm that uses the marshal module to store items as
581 # strings.
582 class Database:
583 def __init__(self, filename, mode):
584 # pybsddb3 has a bug which prevents it from working with
585 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
586 # causes the DB_TRUNCATE flag to be passed, which is disallowed
587 # for databases protected by lock and transaction support
588 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
590 # Therefore, manually perform the removal (we can do this, because
591 # we know that for bsddb - but *not* anydbm in general - the database
592 # consists of one file with the name we specify, rather than several
593 # based on that name).
594 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
595 if os.path.isfile(filename):
596 os.unlink(filename)
597 mode = 'c'
599 self.db = anydbm.open(filename, mode)
601 def has_key(self, key):
602 return self.db.has_key(key)
604 def __getitem__(self, key):
605 return marshal.loads(self.db[key])
607 def __setitem__(self, key, value):
608 self.db[key] = marshal.dumps(value)
610 def __delitem__(self, key):
611 del self.db[key]
613 def get(self, key, default):
614 if self.has_key(key):
615 return self.__getitem__(key)
616 return default
619 class StatsKeeper:
620 __shared_state = { }
621 def __init__(self):
622 self.__dict__ = self.__shared_state
623 if self.__dict__:
624 return
625 self.filename = temp(STATISTICS_FILE)
626 Cleanup().register(self.filename, pass8)
627 # This can get kinda large, so we don't store it in our data dict.
628 self.repos_files = { }
630 if os.path.exists(self.filename):
631 self.unarchive()
632 else:
633 self.data = { 'cvs_revs_count' : 0,
634 'tags': { },
635 'branches' : { },
636 'repos_size' : 0,
637 'repos_file_count' : 0,
638 'svn_rev_count' : None,
639 'first_rev_date' : 1L<<32,
640 'last_rev_date' : 0,
641 'pass_timings' : { },
642 'start_time' : 0,
643 'end_time' : 0,
646 def log_duration_for_pass(self, duration, pass_num):
647 self.data['pass_timings'][pass_num] = duration
649 def set_start_time(self, start):
650 self.data['start_time'] = start
652 def set_end_time(self, end):
653 self.data['end_time'] = end
655 def _bump_item(self, key, amount=1):
656 self.data[key] = self.data[key] + amount
658 def reset_c_rev_info(self):
659 self.data['cvs_revs_count'] = 0
660 self.data['tags'] = { }
661 self.data['branches'] = { }
663 def record_c_rev(self, c_rev):
664 self._bump_item('cvs_revs_count')
666 for tag in c_rev.tags:
667 self.data['tags'][tag] = None
668 for branch in c_rev.branches:
669 self.data['branches'][branch] = None
671 if c_rev.timestamp < self.data['first_rev_date']:
672 self.data['first_rev_date'] = c_rev.timestamp
674 if c_rev.timestamp > self.data['last_rev_date']:
675 self.data['last_rev_date'] = c_rev.timestamp
677 # Only add the size if this is the first time we see the file.
678 if not self.repos_files.has_key(c_rev.fname):
679 self._bump_item('repos_size', c_rev.file_size)
680 self.repos_files[c_rev.fname] = None
682 self.data['repos_file_count'] = len(self.repos_files)
684 def set_svn_rev_count(self, count):
685 self.data['svn_rev_count'] = count
687 def svn_rev_count(self):
688 return self.data['svn_rev_count']
690 def archive(self):
691 open(self.filename, 'w').write(marshal.dumps(self.data))
693 def unarchive(self):
694 self.data = marshal.loads(open(self.filename, 'r').read())
696 def __str__(self):
697 svn_revs_str = ""
698 if self.data['svn_rev_count'] is not None:
699 svn_revs_str = ('Total SVN Commits: %10s\n'
700 % self.data['svn_rev_count'])
702 return ('\n' \
703 'cvs2svn Statistics:\n' \
704 '------------------\n' \
705 'Total CVS Files: %10i\n' \
706 'Total CVS Revisions: %10i\n' \
707 'Total Unique Tags: %10i\n' \
708 'Total Unique Branches: %10i\n' \
709 'CVS Repos Size in KB: %10i\n' \
710 '%s' \
711 'First Revision Date: %s\n' \
712 'Last Revision Date: %s\n' \
713 '------------------' \
714 % (self.data['repos_file_count'],
715 self.data['cvs_revs_count'],
716 len(self.data['tags']),
717 len(self.data['branches']),
718 (self.data['repos_size'] / 1024),
719 svn_revs_str,
720 time.ctime(self.data['first_rev_date']),
721 time.ctime(self.data['last_rev_date']),
724 def timings(self):
725 passes = self.data['pass_timings'].keys()
726 passes.sort()
727 str = 'Timings:\n------------------\n'
729 def desc(val):
730 if val == 1: return "second"
731 return "seconds"
733 for pass_num in passes:
734 duration = int(self.data['pass_timings'][pass_num])
735 p_str = ('pass %d:%6d %s\n'
736 % (pass_num, duration, desc(duration)))
737 str = str + p_str
739 total = int(self.data['end_time'] - self.data['start_time'])
740 str = str + ('total: %6d %s' % (total, desc(total)))
741 return str
744 class LastSymbolicNameDatabase:
745 """ Passing every CVSRevision in s-revs to this class will result in
746 a Database whose key is the last CVS Revision a symbolicname was
747 seen in, and whose value is a list of all symbolicnames that were
748 last seen in that revision."""
749 def __init__(self, mode):
750 self.symbols = {}
751 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
752 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
754 # Once we've gone through all the revs,
755 # symbols.keys() will be a list of all tags and branches, and
756 # their corresponding values will be a key into the last CVS revision
757 # that they were used in.
758 def log_revision(self, c_rev):
759 # Gather last CVS Revision for symbolic name info and tag info
760 for tag in c_rev.tags:
761 self.symbols[tag] = c_rev.unique_key()
762 if c_rev.op is not OP_DELETE:
763 for branch in c_rev.branches:
764 self.symbols[branch] = c_rev.unique_key()
766 # Creates an inversion of symbols above--a dictionary of lists (key
767 # = CVS rev unique_key: val = list of symbols that close in that
768 # rev.
769 def create_database(self):
770 for sym, rev_unique_key in self.symbols.items():
771 if self.symbol_revs_db.has_key(rev_unique_key):
772 ary = self.symbol_revs_db[rev_unique_key]
773 ary.append(sym)
774 self.symbol_revs_db[rev_unique_key] = ary
775 else:
776 self.symbol_revs_db[rev_unique_key] = [sym]
779 class CVSRevisionDatabase:
780 """A Database to store CVSRevision objects and retrieve them by their
781 unique_key()."""
783 def __init__(self, mode):
784 """Initialize an instance, opening database in MODE (like the MODE
785 argument to Database or anydbm.open())."""
786 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
787 Cleanup().register(temp(CVS_REVS_DB), pass8)
789 def log_revision(self, c_rev):
790 """Add C_REV, a CVSRevision, to the database."""
791 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
793 def get_revision(self, unique_key):
794 """Return the CVSRevision stored under UNIQUE_KEY."""
795 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
798 class TagsDatabase(Database):
799 """A Database to store which symbolic names are tags.
800 Each key is a tag name.
801 The value has no meaning, and should be set to None."""
802 def __init__(self, mode):
803 Database.__init__(self, temp(TAGS_DB), mode)
804 Cleanup().register(temp(TAGS_DB), pass8)
807 class CVSRevision:
808 def __init__(self, ctx, *args):
809 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
811 If CTX is None, the following members and methods of the
812 instantiated CVSRevision class object will be unavailable (or
813 simply will not work correctly, if at all):
814 cvs_path
815 svn_path
816 svn_trunk_path
817 is_default_branch_revision()
819 (Note that this class treats CTX as const, because the caller
820 likely passed in a Borg instance of a Ctx. The reason this class
821 takes CTX as as a parameter, instead of just instantiating a Ctx
822 itself, is that this class should be usable outside cvs2svn.)
824 If there is one argument in ARGS, it is a string, in the format of
825 a line from a revs file. Do *not* include a trailing newline.
827 If there are multiple ARGS, there must be 16 of them,
828 comprising a parsed revs line:
829 timestamp --> (int) date stamp for this cvs revision
830 digest --> (string) digest of author+logmsg
831 prev_timestamp --> (int) date stamp for the previous cvs revision
832 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
833 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
834 rev --> (string) this CVS rev, e.g., "1.3"
835 next_rev --> (string or None) next CVS rev, e.g., "1.4"
836 file_in_attic --> (char or None) true if RCS file is in Attic
837 file_executable --> (char or None) true if RCS file has exec bit set.
838 file_size --> (int) size of the RCS file
839 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
840 mode --> (string or None) "kkv", "kb", etc.
841 branch_name --> (string or None) branch on which this rev occurred
842 tags --> (list of strings) all tags on this revision
843 branches --> (list of strings) all branches rooted in this rev
844 fname --> (string) relative path of file in CVS repos
846 The two forms of initialization are equivalent."""
848 self._ctx = ctx
849 if len(args) == 16:
850 (self.timestamp, self.digest, self.prev_timestamp, self.op,
851 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
852 self.file_executable, self.file_size, self.deltatext_code,
853 self.fname,
854 self.mode, self.branch_name, self.tags, self.branches) = args
855 elif len(args) == 1:
856 data = args[0].split(' ', 14)
857 (self.timestamp, self.digest, self.prev_timestamp, self.op,
858 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
859 self.file_executable, self.file_size, self.deltatext_code,
860 self.mode, self.branch_name, numtags, remainder) = data
861 # Patch up data items which are not simple strings
862 self.timestamp = int(self.timestamp, 16)
863 if self.prev_timestamp == "*":
864 self.prev_timestamp = 0
865 else:
866 self.prev_timestamp = int(self.prev_timestamp)
867 if self.prev_rev == "*":
868 self.prev_rev = None
869 if self.next_rev == "*":
870 self.next_rev = None
871 if self.file_in_attic == "*":
872 self.file_in_attic = None
873 if self.file_executable == "*":
874 self.file_executable = None
875 self.file_size = int(self.file_size)
876 if self.mode == "*":
877 self.mode = None
878 if self.branch_name == "*":
879 self.branch_name = None
880 numtags = int(numtags)
881 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
882 self.tags = tags_and_numbranches_and_remainder[:-2]
883 numbranches = int(tags_and_numbranches_and_remainder[-2])
884 remainder = tags_and_numbranches_and_remainder[-1]
885 branches_and_fname = remainder.split(' ', numbranches)
886 self.branches = branches_and_fname[:-1]
887 self.fname = branches_and_fname[-1]
888 else:
889 raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
890 (len(args) + 1)
891 if ctx is not None:
892 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
893 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
894 self.svn_trunk_path = self._make_path(self.cvs_path)
896 # The 'primary key' of a CVS Revision is the revision number + the
897 # filename. To provide a unique key (say, for a dict), we just glom
898 # them together in a string. By passing in self.prev_rev or
899 # self.next_rev, you can get the unique key for their respective
900 # CVSRevisions.
901 def unique_key(self, revnum=None):
902 if revnum is None:
903 revnum = self.rev
904 return revnum + "/" + self.fname
906 def __str__(self):
907 return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
908 self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
909 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
910 (self.file_in_attic or "*"), (self.file_executable or "*"),
911 self.file_size,
912 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
913 len(self.tags), self.tags and " " or "", " ".join(self.tags),
914 len(self.branches), self.branches and " " or "", " ".join(self.branches),
915 self.fname, ))
917 # Returns true if this CVSRevision is the opening CVSRevision for
918 # NAME (for this RCS file).
919 def opens_symbolic_name(self, name):
920 if name in self.tags:
921 return 1
922 if name in self.branches:
923 # If this c_rev opens a branch and our op is OP_DELETE, then
924 # that means that the file that this c_rev belongs to was
925 # created on the branch, so for all intents and purposes, this
926 # c_rev is *technically* not an opening. See Issue #62 for more
927 # information.
928 if self.op != OP_DELETE:
929 return 1
930 return 0
932 def is_default_branch_revision(self):
933 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
934 revision according to DEFAULT_BRANCHES_DB (see the conditions
935 documented there), else return None."""
936 if self._ctx._default_branches_db.has_key(self.cvs_path):
937 val = self._ctx._default_branches_db[self.cvs_path]
938 val_last_dot = val.rindex(".")
939 our_last_dot = self.rev.rindex(".")
940 default_branch = val[:val_last_dot]
941 our_branch = self.rev[:our_last_dot]
942 default_rev_component = int(val[val_last_dot + 1:])
943 our_rev_component = int(self.rev[our_last_dot + 1:])
944 if (default_branch == our_branch
945 and our_rev_component <= default_rev_component):
946 return 1
947 # else
948 return None
950 def _make_path(self, path, branch_name = None):
951 """Return the trunk path or branch path for PATH.
953 If PATH is None, return None."""
954 # For a while, we treated each top-level subdir of the CVS
955 # repository as a "project root" and interpolated the appropriate
956 # genealogy (trunk|tag|branch) in according to the official
957 # recommended layout. For example, the path '/foo/bar/baz.c' on
958 # branch 'Rel2' would become
960 # /foo/branches/Rel2/bar/baz.c
962 # and on trunk it would become
964 # /foo/trunk/bar/baz.c
966 # However, we went back to the older and simpler method of just
967 # prepending the genealogy to the front, instead of interpolating.
968 # So now we produce:
970 # /branches/Rel2/foo/bar/baz.c
971 # /trunk/foo/bar/baz.c
973 # Why? Well, Jack Repenning pointed out that this way is much
974 # friendlier to "anonymously rooted subtrees" (that's a tree where
975 # the name of the top level dir doesn't matter, the point is that if
976 # you cd into it and, say, run 'make', something good will happen).
977 # By interpolating, we made it impossible to point cvs2svn at some
978 # subdir in the CVS repository and convert it as a project, because
979 # we'd treat every subdir underneath it as an independent project
980 # root, which is probably not what the user wanted.
982 # Also, see Blair Zajac's post
984 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
986 # and the surrounding thread, for why what people really want is a
987 # way of specifying an in-repository prefix path, not interpolation.
988 if path is None:
989 return None
991 if branch_name:
992 branch_name = _clean_symbolic_name(branch_name)
993 return self._ctx.branches_base + '/' + branch_name + '/' + path
994 else:
995 return self._ctx.trunk_base + '/' + path
997 def rcs_path(self):
998 """Returns the actual filesystem path to the RCS file of this
999 CVSRevision."""
1000 if self.file_in_attic is None:
1001 return self.fname
1002 else:
1003 basepath, filename = os.path.split(self.fname)
1004 return os.path.join(basepath, 'Attic', filename)
1006 def filename(self):
1007 "Return the last path component of self.fname, minus the ',v'"
1008 return os.path.split(self.fname)[-1][:-2]
1010 class SymbolDatabase:
1011 """This database records information on all symbols in the RCS
1012 files. It is created in pass 1 and it is used in pass 2."""
1013 def __init__(self):
1014 # A hash that maps tag names to commit counts
1015 self.tags = { }
1016 # A hash that maps branch names to lists of the format
1017 # [ create_count, commit_count, blockers ], where blockers
1018 # is a hash that lists the symbols that depend on the
1019 # the branch. The blockers hash is used as a set, so the
1020 # values are not used.
1021 self.branches = { }
1023 def register_tag_creation(self, name):
1024 """Register the creation of the tag NAME."""
1025 if not self.tags.has_key(name):
1026 self.tags[name] = 0
1027 self.tags[name] += 1
1029 def _branch(self, name):
1030 """Helper function to get a branch node that will create and
1031 initialize the node if it does not exist."""
1032 if not self.branches.has_key(name):
1033 self.branches[name] = [ 0, 0, { } ]
1034 return self.branches[name]
1036 def register_branch_creation(self, name):
1037 """Register the creation of the branch NAME."""
1038 self._branch(name)[0] += 1
1040 def register_branch_commit(self, name):
1041 """Register a commit on the branch NAME."""
1042 self._branch(name)[1] += 1
1044 def register_branch_blocker(self, name, blocker):
1045 """Register BLOCKER as a blocker on the branch NAME."""
1046 self._branch(name)[2][blocker] = None
1048 def branch_has_commit(self, name):
1049 """Return non-zero if NAME has commits. Returns 0 if name
1050 is not a branch or if it has no commits."""
1051 return self.branches.has_key(name) and self.branches[name][1]
1053 def find_excluded_symbols(self, regexp_list):
1054 """Returns a hash of all symbols thaht match the regexps in
1055 REGEXP_LISTE. The hash is used as a set so the values are
1056 not used."""
1057 excludes = { }
1058 for tag in self.tags.keys():
1059 if match_regexp_list(regexp_list, tag):
1060 excludes[tag] = None
1061 for branch in self.branches.keys():
1062 if match_regexp_list(regexp_list, branch):
1063 excludes[branch] = None
1064 return excludes
1066 def find_branch_exclude_blockers(self, branch, excludes):
1067 """Find all blockers of BRANCH, excluding the ones in the hash
1068 EXCLUDES."""
1069 blockers = { }
1070 if excludes.has_key(branch):
1071 for blocker in self.branches[branch][2]:
1072 if not excludes.has_key(blocker):
1073 blockers[blocker] = None
1074 return blockers
1076 def find_blocked_excludes(self, excludes):
1077 """Find all branches not in EXCLUDES that have blocking symbols that
1078 are not themselves excluded. Return a hash that maps branch names
1079 to a hash of blockers. The hash of blockes is used as a set so the
1080 values are not used."""
1081 blocked_branches = { }
1082 for branch in self.branches.keys():
1083 blockers = self.find_branch_exclude_blockers(branch, excludes)
1084 if blockers:
1085 blocked_branches[branch] = blockers
1086 return blocked_branches
1088 def find_mismatches(self, excludes=None):
1089 """Find all symbols that are defined as both tags and branches,
1090 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1091 the symbol name, tag count, branch count and commit count."""
1092 if excludes is None:
1093 excludes = { }
1094 mismatches = [ ]
1095 for branch in self.branches.keys():
1096 if not excludes.has_key(branch) and self.tags.has_key(branch):
1097 mismatches.append((branch, # name
1098 self.tags[branch], # tag count
1099 self.branches[branch][0], # branch count
1100 self.branches[branch][1])) # commit count
1101 return mismatches
1103 def read(self):
1104 """Read the symbol database from files."""
1105 f = open(temp(TAGS_LIST))
1106 while 1:
1107 line = f.readline()
1108 if not line:
1109 break
1110 tag, count = line.split()
1111 self.tags[tag] = int(count)
1113 f = open(temp(BRANCHES_LIST))
1114 while 1:
1115 line = f.readline()
1116 if not line:
1117 break
1118 words = line.split()
1119 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1120 for blocker in words[3:]:
1121 self.branches[words[0]][2][blocker] = None
1123 def write(self):
1124 """Store the symbol database to files."""
1125 f = open(temp(TAGS_LIST), "w")
1126 Cleanup().register(temp(TAGS_LIST), pass2)
1127 for tag, count in self.tags.items():
1128 f.write("%s %d\n" % (tag, count))
1130 f = open(temp(BRANCHES_LIST), "w")
1131 Cleanup().register(temp(BRANCHES_LIST), pass2)
1132 for branch, info in self.branches.items():
1133 f.write("%s %d %d" % (branch, info[0], info[1]))
1134 if info[2]:
1135 f.write(" ")
1136 f.write(" ".join(info[2].keys()))
1137 f.write("\n")
1139 class CollectData(cvs2svn_rcsparse.Sink):
1140 def __init__(self):
1141 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1142 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1143 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1144 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1145 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
1146 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1147 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1148 Cleanup().register(temp(METADATA_DB), pass8)
1149 self.fatal_errors = []
1150 self.num_files = 0
1151 self.symbol_db = SymbolDatabase()
1153 # 1 if we've collected data for at least one file, None otherwise.
1154 self.found_valid_file = None
1156 # See set_fname() for initializations of other variables.
1158 def set_fname(self, canonical_name, filename):
1159 """Prepare to receive data for FILENAME. FILENAME is the absolute
1160 filesystem path to the file in question, and CANONICAL_NAME is
1161 FILENAME with the 'Attic' component removed (if the file is indeed
1162 in the Attic) ."""
1163 self.fname = canonical_name
1165 # We calculate and save some file metadata here, where we can do
1166 # it only once per file, instead of waiting until later where we
1167 # would have to do the same calculations once per CVS *revision*.
1169 self.rel_name = relative_name(Ctx().cvsroot, self.fname)[:-2]
1171 # If the paths are not the same, then that means that the
1172 # canonical_name has had the 'Attic' component stripped out.
1173 self.file_in_attic = None
1174 if not canonical_name == filename:
1175 self.file_in_attic = 1
1177 file_stat = os.stat(filename)
1178 # The size of our file in bytes
1179 self.file_size = file_stat[stat.ST_SIZE]
1181 # Whether or not the executable bit is set.
1182 self.file_executable = None
1183 if file_stat[0] & stat.S_IXUSR:
1184 self.file_executable = 1
1186 # revision -> [timestamp, author, old-timestamp]
1187 self.rev_data = { }
1189 # Maps revision number (key) to the revision number of the
1190 # previous revision along this line of development.
1192 # For the first revision R on a branch, we consider the revision
1193 # from which R sprouted to be the 'previous'.
1195 # Note that this revision can't be determined arithmetically (due
1196 # to cvsadmin -o, which is why this is necessary).
1197 self.prev_rev = { }
1199 # This dict is essentially self.prev_rev with the values mapped in
1200 # the other direction, so following key -> value will yield you
1201 # the next revision number
1202 self.next_rev = { }
1204 # Track the state of each revision so that in set_revision_info,
1205 # we can determine if our op is an add/change/delete. We can do
1206 # this because in set_revision_info, we'll have all of the
1207 # revisions for a file at our fingertips, and we need to examine
1208 # the state of our prev_rev to determine if we're an add or a
1209 # change--without the state of the prev_rev, we are unable to
1210 # distinguish between an add and a change.
1211 self.rev_state = { }
1213 # Hash mapping branch numbers, like '1.7.2', to branch names,
1214 # like 'Release_1_0_dev'.
1215 self.branch_names = { }
1217 # RCS flags (used for keyword expansion).
1218 self.mode = None
1220 # Hash mapping revision numbers, like '1.7', to lists of names
1221 # indicating which branches sprout from that revision, like
1222 # ['Release_1_0_dev', 'experimental_driver', ...].
1223 self.branchlist = { }
1225 # Like self.branchlist, but the values are lists of tag names that
1226 # apply to the key revision.
1227 self.taglist = { }
1229 # If set, this is an RCS branch number -- rcsparse calls this the
1230 # "principal branch", but CVS and RCS refer to it as the "default
1231 # branch", so that's what we call it, even though the rcsparse API
1232 # setter method is still 'set_principal_branch'.
1233 self.default_branch = None
1235 # If the RCS file doesn't have a default branch anymore, but does
1236 # have vendor revisions, then we make an educated guess that those
1237 # revisions *were* the head of the default branch up until the
1238 # commit of 1.2, at which point the file's default branch became
1239 # trunk. This records the date at which 1.2 was committed.
1240 self.first_non_vendor_revision_date = None
1242 # A list of all symbols defined for the current file. Used to
1243 # prevent multiple definitions of a symbol, something which can
1244 # easily happen when --symbol-transform is used.
1245 self.defined_symbols = [ ]
1247 def set_principal_branch(self, branch):
1248 self.default_branch = branch
1250 def set_expansion(self, mode):
1251 self.mode = mode
1253 def set_branch_name(self, branch_number, name):
1254 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1255 and that NAME sprouts from BRANCH_NUMBER .
1256 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1257 for example '1.7.2' (never '1.7.0.2')."""
1258 if not self.branch_names.has_key(branch_number):
1259 self.branch_names[branch_number] = name
1260 # The branchlist is keyed on the revision number from which the
1261 # branch sprouts, so strip off the odd final component.
1262 sprout_rev = branch_number[:branch_number.rfind(".")]
1263 if not self.branchlist.has_key(sprout_rev):
1264 self.branchlist[sprout_rev] = []
1265 self.branchlist[sprout_rev].append(name)
1266 self.symbol_db.register_branch_creation(name)
1267 else:
1268 sys.stderr.write("%s: in '%s':\n"
1269 " branch '%s' already has name '%s',\n"
1270 " cannot also have name '%s', ignoring the latter\n"
1271 % (warning_prefix, self.fname, branch_number,
1272 self.branch_names[branch_number], name))
1274 def rev_to_branch_name(self, revision):
1275 """Return the name of the branch on which REVISION lies.
1276 REVISION is a non-branch revision number with an even number of,
1277 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1278 For the convenience of callers, REVISION can also be a trunk
1279 revision such as '1.2', in which case just return None."""
1280 if trunk_rev.match(revision):
1281 return None
1282 return self.branch_names.get(revision[:revision.rindex(".")])
1284 def add_cvs_branch(self, revision, branch_name):
1285 """Record the root revision and branch revision for BRANCH_NAME,
1286 based on REVISION. REVISION is a CVS branch number having an even
1287 number of components where the second-to-last is '0'. For
1288 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1289 from 1.7 and has branch number 1.7.2."""
1290 last_dot = revision.rfind(".")
1291 branch_rev = revision[:last_dot]
1292 last2_dot = branch_rev.rfind(".")
1293 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1294 self.set_branch_name(branch_rev, branch_name)
1296 def define_tag(self, name, revision):
1297 """Record a bidirectional mapping between symbolic NAME and REVISION.
1298 REVISION is an unprocessed revision number from the RCS file's
1299 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1300 This function will determine what kind of symbolic name it is by
1301 inspection, and record it in the right places."""
1302 for (pattern, replacement) in Ctx().symbol_transforms:
1303 newname = re.sub(pattern, replacement, name)
1304 if newname != name:
1305 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1306 % (name, newname))
1307 name = newname
1308 if name in self.defined_symbols:
1309 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1310 % (error_prefix, name, self.fname)
1311 sys.stderr.write(err + "\n")
1312 self.fatal_errors.append(err)
1313 self.defined_symbols.append(name)
1314 if branch_tag.match(revision):
1315 self.add_cvs_branch(revision, name)
1316 elif vendor_tag.match(revision):
1317 self.set_branch_name(revision, name)
1318 else:
1319 if not self.taglist.has_key(revision):
1320 self.taglist[revision] = []
1321 self.taglist[revision].append(name)
1322 self.symbol_db.register_tag_creation(name)
1324 def define_revision(self, revision, timestamp, author, state,
1325 branches, next):
1327 # Record the state of our revision for later calculations
1328 self.rev_state[revision] = state
1330 # store the rev_data as a list in case we have to jigger the timestamp
1331 self.rev_data[revision] = [int(timestamp), author, None]
1333 # When on trunk, the RCS 'next' revision number points to what
1334 # humans might consider to be the 'previous' revision number. For
1335 # example, 1.3's RCS 'next' is 1.2.
1337 # However, on a branch, the RCS 'next' revision number really does
1338 # point to what humans would consider to be the 'next' revision
1339 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1341 # In other words, in RCS, 'next' always means "where to find the next
1342 # deltatext that you need this revision to retrieve.
1344 # That said, we don't *want* RCS's behavior here, so we determine
1345 # whether we're on trunk or a branch and set self.prev_rev
1346 # accordingly.
1348 # One last thing. Note that if REVISION is a branch revision,
1349 # instead of mapping REVISION to NEXT, we instead map NEXT to
1350 # REVISION. Since we loop over all revisions in the file before
1351 # doing anything with the data we gather here, this 'reverse
1352 # assignment' effectively does the following:
1354 # 1. Gives us no 'prev' value for REVISION (in this
1355 # iteration... it may have been set in a previous iteration)
1357 # 2. Sets the 'prev' value for the revision with number NEXT to
1358 # REVISION. So when we come around to the branch revision whose
1359 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1360 # set.
1361 if trunk_rev.match(revision):
1362 self.prev_rev[revision] = next
1363 self.next_rev[next] = revision
1364 elif next:
1365 self.prev_rev[next] = revision
1366 self.next_rev[revision] = next
1368 for b in branches:
1369 self.prev_rev[b] = revision
1371 # Ratchet up the highest vendor head revision, if necessary.
1372 if self.default_branch:
1373 default_branch_root = self.default_branch + "."
1374 if ((revision.find(default_branch_root) == 0)
1375 and (default_branch_root.count('.') == revision.count('.'))):
1376 # This revision is on the default branch, so record that it is
1377 # the new highest default branch head revision.
1378 self.default_branches_db[self.rel_name] = revision
1379 else:
1380 # No default branch, so make an educated guess.
1381 if revision == '1.2':
1382 # This is probably the time when the file stopped having a
1383 # default branch, so make a note of it.
1384 self.first_non_vendor_revision_date = timestamp
1385 else:
1386 m = vendor_revision.match(revision)
1387 if m and ((not self.first_non_vendor_revision_date)
1388 or (timestamp < self.first_non_vendor_revision_date)):
1389 # We're looking at a vendor revision, and it wasn't
1390 # committed after this file lost its default branch, so bump
1391 # the maximum trunk vendor revision in the permanent record.
1392 self.default_branches_db[self.rel_name] = revision
1394 if not trunk_rev.match(revision):
1395 # Check for unlabeled branches, record them. We tried to collect
1396 # all branch names when we parsed the symbolic name header
1397 # earlier, of course, but that didn't catch unlabeled branches.
1398 # If a branch is unlabeled, this is our first encounter with it,
1399 # so we have to record its data now.
1400 branch_number = revision[:revision.rindex(".")]
1401 if not self.branch_names.has_key(branch_number):
1402 branch_name = "unlabeled-" + branch_number
1403 self.set_branch_name(branch_number, branch_name)
1405 # Register the commit on this non-trunk branch
1406 branch_name = self.branch_names[branch_number]
1407 self.symbol_db.register_branch_commit(branch_name)
1409 def tree_completed(self):
1410 "The revision tree has been parsed. Analyze it for consistency."
1412 # Our algorithm depends upon the timestamps on the revisions occuring
1413 # monotonically over time. That is, we want to see rev 1.34 occur in
1414 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1415 # sorting), and then tried to insert 1.34, we'd be screwed.
1417 # to perform the analysis, we'll simply visit all of the 'previous'
1418 # links that we have recorded and validate that the timestamp on the
1419 # previous revision is before the specified revision
1421 # if we have to resync some nodes, then we restart the scan. just keep
1422 # looping as long as we need to restart.
1423 while 1:
1424 for current, prev in self.prev_rev.items():
1425 if not prev:
1426 # no previous revision exists (i.e. the initial revision)
1427 continue
1428 t_c = self.rev_data[current][0]
1429 t_p = self.rev_data[prev][0]
1430 if t_p >= t_c:
1431 # the previous revision occurred later than the current revision.
1432 # shove the previous revision back in time (and any before it that
1433 # may need to shift).
1435 # We sync backwards and not forwards because any given CVS
1436 # Revision has only one previous revision. However, a CVS
1437 # Revision can *be* a previous revision for many other
1438 # revisions (e.g., a revision that is the source of multiple
1439 # branches). This becomes relevant when we do the secondary
1440 # synchronization in pass 2--we can make certain that we
1441 # don't resync a revision earlier than it's previous
1442 # revision, but it would be non-trivial to make sure that we
1443 # don't resync revision R *after* any revisions that have R
1444 # as a previous revision.
1445 while t_p >= t_c:
1446 self.rev_data[prev][0] = t_c - 1 # new timestamp
1447 self.rev_data[prev][2] = t_p # old timestamp
1448 delta = t_c - 1 - t_p
1449 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1450 % (self.rel_name,
1451 prev, time.ctime(t_p), delta)
1452 Log().write(LOG_VERBOSE, msg)
1453 if (delta > COMMIT_THRESHOLD
1454 or delta < (COMMIT_THRESHOLD * -1)):
1455 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1456 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1457 delta))
1458 current = prev
1459 prev = self.prev_rev[current]
1460 if not prev:
1461 break
1462 t_c = t_c - 1 # self.rev_data[current][0]
1463 t_p = self.rev_data[prev][0]
1465 # break from the for-loop
1466 break
1467 else:
1468 # finished the for-loop (no resyncing was performed)
1469 return
1471 def set_revision_info(self, revision, log, text):
1472 timestamp, author, old_ts = self.rev_data[revision]
1473 digest = sha.new(log + '\0' + author).hexdigest()
1474 if old_ts:
1475 # the timestamp on this revision was changed. log it for later
1476 # resynchronization of other files's revisions that occurred
1477 # for this time and log message.
1478 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1480 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1481 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1483 # If revision 1.1 appears to have been created via 'cvs add'
1484 # instead of 'cvs import', then this file probably never had a
1485 # default branch, so retroactively remove its record in the
1486 # default branches db. The test is that the log message CVS uses
1487 # for 1.1 in imports is "Initial revision\n" with no period.
1488 if revision == '1.1' and log != 'Initial revision\n':
1489 if self.default_branches_db.has_key(self.rel_name):
1490 del self.default_branches_db[self.rel_name]
1492 # Get the timestamp of the previous revision
1493 prev_rev = self.prev_rev.get(revision, None)
1494 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1496 # How to tell if a CVSRevision is an add, a change, or a deletion:
1498 # It's a delete if RCS state is 'dead'
1500 # It's an add if RCS state is 'Exp.' and
1501 # - we either have no previous revision
1502 # or
1503 # - we have a previous revision whose state is 'dead'
1505 # Anything else is a change.
1506 if self.rev_state[revision] == 'dead':
1507 op = OP_DELETE
1508 elif ((self.prev_rev.get(revision, None) is None)
1509 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1510 op = OP_ADD
1511 else:
1512 op = OP_CHANGE
1514 if text:
1515 deltatext_code = DELTATEXT_NONEMPTY
1516 else:
1517 deltatext_code = DELTATEXT_EMPTY
1519 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
1520 self.prev_rev[revision], revision,
1521 self.next_rev.get(revision),
1522 self.file_in_attic, self.file_executable,
1523 self.file_size,
1524 deltatext_code, self.fname,
1525 self.mode, self.rev_to_branch_name(revision),
1526 self.taglist.get(revision, []),
1527 self.branchlist.get(revision, []))
1528 self.revs.write(str(c_rev) + "\n")
1529 StatsKeeper().record_c_rev(c_rev)
1531 if not self.metadata_db.has_key(digest):
1532 self.metadata_db[digest] = (author, log)
1534 def parse_completed(self):
1535 # Walk through all branches and tags and register them with
1536 # their parent branch in the symbol database.
1537 for revision, symbols in self.taglist.items() + self.branchlist.items():
1538 for symbol in symbols:
1539 name = self.rev_to_branch_name(revision)
1540 if name is not None:
1541 self.symbol_db.register_branch_blocker(name, symbol)
1543 self.num_files = self.num_files + 1
1545 def write_symbol_db(self):
1546 self.symbol_db.write()
1548 class SymbolingsLogger:
1549 """Manage the file that contains lines for symbol openings and
1550 closings.
1552 This data will later be used to determine valid SVNRevision ranges
1553 from which a file can be copied when creating a branch or tag in
1554 Subversion. Do this by finding "Openings" and "Closings" for each
1555 file copied onto a branch or tag.
1557 An "Opening" is the CVSRevision from which a given branch/tag
1558 sprouts on a path.
1560 The "Closing" for that branch/tag and path is the next CVSRevision
1561 on the same line of development as the opening.
1563 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1564 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1565 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1566 'foo.c'. Note that there may be many revisions chronologically
1567 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1568 perhaps even including on branch BEE itself. But 1.3 is the next
1569 revision *on the same line* as 1.2, that is why it is the closing
1570 revision for those symbolic names of which 1.2 is the opening.
1572 The reason for doing all this hullabaloo is to make branch and tag
1573 creation as efficient as possible by minimizing the number of copies
1574 and deletes per creation. For example, revisions 1.2 and 1.3 of
1575 foo.c might correspond to revisions 17 and 30 in Subversion. That
1576 means that when creating branch BEE, there is some motivation to do
1577 the copy from one of 17-30. Now if there were another file,
1578 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1579 to revisions 24 and 39 in Subversion, we would know that the ideal
1580 thing would be to copy the branch from somewhere between 24 and 29,
1581 inclusive.
1583 def __init__(self):
1584 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1585 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1586 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1587 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1589 # This keys of this dictionary are Subversion repository *source*
1590 # paths for which we've encountered an 'opening'. The values are
1591 # the symbolic names that this path has opened. The only paths
1592 # that should be in this dict are paths whose corresponding
1593 # CVSRevision is a default branch revision.
1594 self.open_paths_with_default_branches = { }
1596 def log_revision(self, c_rev, svn_revnum):
1597 """Log any openings found in C_REV, and if C_REV.next_rev is not
1598 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1599 any) will have its revnum determined later."""
1600 for name in c_rev.tags + c_rev.branches:
1601 name = _clean_symbolic_name(name)
1602 self._note_default_branch_opening(c_rev, name)
1603 if c_rev.op != OP_DELETE:
1604 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1606 # If our c_rev has a next_rev, then that's the closing rev for
1607 # this source revision. Log it to closings for later processing
1608 # since we don't know the svn_revnum yet.
1609 if c_rev.next_rev is not None:
1610 self.closings.write('%s %s\n' %
1611 (name, c_rev.unique_key(c_rev.next_rev)))
1613 def _log(self, name, svn_revnum, svn_path, type):
1614 """Write out a single line to the symbol_openings_closings file
1615 representing that svn_revnum of svn_path is either the opening or
1616 closing (TYPE) of NAME (a symbolic name).
1618 TYPE should only be one of the following global constants:
1619 OPENING or CLOSING."""
1620 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1621 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1622 type, svn_path))
1624 def close(self):
1625 """Iterate through the closings file, lookup the svn_revnum for
1626 each closing CVSRevision, and write a proper line out to the
1627 symbolings file."""
1628 # Use this to get the c_rev.svn_path of our rev_key
1629 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1631 self.closings.close()
1632 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1633 (name, rev_key) = line.rstrip().split(" ", 1)
1634 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1636 c_rev = cvs_revs_db.get_revision(rev_key)
1637 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1639 self.symbolings.close()
1641 def _note_default_branch_opening(self, c_rev, symbolic_name):
1642 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1643 as an opening for SYMBOLIC_NAME."""
1644 path = c_rev.svn_trunk_path
1645 if not self.open_paths_with_default_branches.has_key(path):
1646 self.open_paths_with_default_branches[path] = [ ]
1647 self.open_paths_with_default_branches[path].append(symbolic_name)
1649 def log_default_branch_closing(self, c_rev, svn_revnum):
1650 """If self.open_paths_with_default_branches contains
1651 C_REV.svn_trunk_path, then call log each name in
1652 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1653 closing with SVN_REVNUM as the closing revision number. """
1654 path = c_rev.svn_trunk_path
1655 if self.open_paths_with_default_branches.has_key(path):
1656 # log each symbol as a closing
1657 for name in self.open_paths_with_default_branches[path]:
1658 self._log(name, svn_revnum, path, CLOSING)
1659 # Remove them from the openings list as we're done with them.
1660 del self.open_paths_with_default_branches[path]
1663 class PersistenceManager:
1664 """The PersistenceManager allows us to effectively store SVNCommits
1665 to disk and retrieve them later using only their subversion revision
1666 number as the key. It also returns the subversion revision number
1667 for a given CVSRevision's unique key.
1669 All information pertinent to each SVNCommit is stored in a series of
1670 on-disk databases so that SVNCommits can be retrieved on-demand.
1672 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1673 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1674 databases and be fully-featured.
1675 In 'read' mode, PersistenceManager will open existing on-disk databases
1676 and the set_* methods will be unavailable."""
1677 def __init__(self, mode):
1678 self.mode = mode
1679 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1680 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1681 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1682 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1683 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1684 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1685 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1686 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1687 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1688 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1689 ###PERF kff Elsewhere there are comments about sucking the tags db
1690 ### into memory. That seems like a good idea.
1691 if not Ctx().trunk_only:
1692 self.tags_db = TagsDatabase(DB_OPEN_READ)
1693 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
1694 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1696 # "branch_name" -> svn_revnum in which branch was last filled.
1697 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1698 # revision which would have nothing to do.
1699 self.last_filled = {}
1701 def get_svn_revnum(self, cvs_rev_unique_key):
1702 """Return the Subversion revision number in which
1703 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1704 is no mapping for CVS_REV_UNIQUE_KEY."""
1705 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1707 def get_svn_commit(self, svn_revnum):
1708 """Return an SVNCommit that corresponds to SVN_REVNUM.
1710 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1712 This method can throw SVNCommitInternalInconsistencyError.
1714 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1715 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1716 if c_rev_keys == None:
1717 return None
1719 digest = None
1720 for key in c_rev_keys:
1721 c_rev = self.cvs_revisions.get_revision(key)
1722 svn_commit.add_revision(c_rev)
1723 # Set the author and log message for this commit by using
1724 # CVSRevision metadata, but only if haven't done so already.
1725 if digest is None:
1726 digest = c_rev.digest
1727 author, log_msg = self.svn_commit_metadata[digest]
1728 svn_commit.set_author(author)
1729 svn_commit.set_log_msg(log_msg)
1731 # If we're doing a trunk-only conversion, we don't need to do any more work.
1732 if Ctx().trunk_only:
1733 return svn_commit
1735 name, date = self._get_name_and_date(svn_revnum)
1736 if name:
1737 svn_commit.set_symbolic_name(name)
1738 svn_commit.set_date(date)
1739 if self.tags_db.has_key(name):
1740 svn_commit.is_tag = 1
1742 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1743 if motivating_revnum:
1744 svn_commit.set_motivating_revnum(int(motivating_revnum))
1745 svn_commit.set_date(date)
1747 if len(svn_commit.cvs_revs) and name:
1748 msg = """An SVNCommit cannot have cvs_revisions *and* a
1749 corresponding symbolic name ('%s') to fill.""" % name
1750 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1752 return svn_commit
1754 def set_cvs_revs(self, svn_revnum, cvs_revs):
1755 """Record the bidirectional mapping between SVN_REVNUM and
1756 CVS_REVS."""
1757 if self.mode == DB_OPEN_READ:
1758 raise RuntimeError, \
1759 'Write operation attempted on read-only PersistenceManager'
1760 for c_rev in cvs_revs:
1761 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1762 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1763 for c_rev in cvs_revs:
1764 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1766 def set_name_and_date(self, svn_revnum, name, date):
1767 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1768 if self.mode == DB_OPEN_READ:
1769 raise RuntimeError, \
1770 'Write operation attempted on read-only PersistenceManager'
1771 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1772 self.last_filled[name] = svn_revnum
1774 def _get_name_and_date(self, svn_revnum):
1775 """Return a tuple containing the symbolic name and date associated
1776 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1777 associated with it."""
1778 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1780 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1781 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1782 if self.mode == DB_OPEN_READ:
1783 raise RuntimeError, \
1784 'Write operation attempted on read-only PersistenceManager'
1785 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1788 class CVSCommit:
1789 """Each instance of this class contains a number of CVS Revisions
1790 that correspond to one or more Subversion Commits. After all CVS
1791 Revisions are added to the grouping, calling process_revisions will
1792 generate a Subversion Commit (or Commits) for the set of CVS
1793 Revisions in the grouping."""
1795 def __init__(self, digest, author, log):
1796 self.digest = digest
1797 self.author = author
1798 self.log = log
1800 # Symbolic names for which the last source revision has already
1801 # been seen and for which the CVSRevisionAggregator has already
1802 # generated a fill SVNCommit. See self.process_revisions().
1803 self.done_symbols = [ ]
1805 self.files = { }
1806 # Lists of CVSRevisions
1807 self.changes = [ ]
1808 self.deletes = [ ]
1810 # Start out with a t_min higher than any incoming time T, and a
1811 # t_max lower than any incoming T. This way the first T will
1812 # push t_min down to T, and t_max up to T, naturally (without any
1813 # special-casing), and successive times will then ratchet them
1814 # outward as appropriate.
1815 self.t_min = 1L<<32
1816 self.t_max = 0
1818 # This will be set to the SVNCommit that occurs in self._commit.
1819 self.motivating_commit = None
1821 # This is a list of all non-primary commits motivated by the main
1822 # commit. We gather these so that we can set their dates to the
1823 # same date as the primary commit.
1824 self.secondary_commits = [ ]
1826 # State for handling default branches.
1828 # Here is a tempting, but ultimately nugatory, bit of logic, which
1829 # I share with you so you may appreciate the less attractive, but
1830 # refreshingly non-nugatory, logic which follows it:
1832 # If some of the commits in this txn happened on a non-trunk
1833 # default branch, then those files will have to be copied into
1834 # trunk manually after being changed on the branch (because the
1835 # RCS "default branch" appears as head, i.e., trunk, in practice).
1836 # As long as those copies don't overwrite any trunk paths that
1837 # were also changed in this commit, then we can do the copies in
1838 # the same revision, because they won't cover changes that don't
1839 # appear anywhere/anywhen else. However, if some of the trunk dst
1840 # paths *did* change in this commit, then immediately copying the
1841 # branch changes would lose those trunk mods forever. So in this
1842 # case, we need to do at least that copy in its own revision. And
1843 # for simplicity's sake, if we're creating the new revision for
1844 # even one file, then we just do all such copies together in the
1845 # new revision.
1847 # Doesn't that sound nice?
1849 # Unfortunately, Subversion doesn't support copies with sources
1850 # in the current txn. All copies must be based in committed
1851 # revisions. Therefore, we generate the above-described new
1852 # revision unconditionally.
1854 # This is a list of c_revs, and a c_rev is appended for each
1855 # default branch commit that will need to be copied to trunk (or
1856 # deleted from trunk) in some generated revision following the
1857 # "regular" revision.
1858 self.default_branch_cvs_revisions = [ ]
1860 def __cmp__(self, other):
1861 # Commits should be sorted by t_max. If both self and other have
1862 # the same t_max, break the tie using t_min, and lastly, digest
1863 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1864 or cmp(self.digest, other.digest))
1866 def has_file(self, fname):
1867 return self.files.has_key(fname)
1869 def revisions(self):
1870 return self.changes + self.deletes
1872 def opens_symbolic_name(self, name):
1873 """Returns true if any CVSRevision in this commit is on a tag or a
1874 branch or is the origin of a tag or branch."""
1875 for c_rev in self.revisions():
1876 if c_rev.opens_symbolic_name(name):
1877 return 1
1878 return 0
1880 def add_revision(self, c_rev):
1881 # Record the time range of this commit.
1883 # ### ISSUE: It's possible, though unlikely, that the time range
1884 # of a commit could get gradually expanded to be arbitrarily
1885 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1886 # problem, and anyway deciding where to break it up would be a
1887 # judgement call. For now, we just print a warning in commit() if
1888 # this happens.
1889 if c_rev.timestamp < self.t_min:
1890 self.t_min = c_rev.timestamp
1891 if c_rev.timestamp > self.t_max:
1892 self.t_max = c_rev.timestamp
1894 if c_rev.op == OP_DELETE:
1895 self.deletes.append(c_rev)
1896 else:
1897 # OP_CHANGE or OP_ADD
1898 self.changes.append(c_rev)
1900 self.files[c_rev.fname] = 1
1902 def _pre_commit(self):
1903 """Generates any SVNCommits that must exist before the main
1904 commit."""
1906 # There may be multiple c_revs in this commit that would cause
1907 # branch B to be filled, but we only want to fill B once. On the
1908 # other hand, there might be multiple branches committed on in
1909 # this commit. Whatever the case, we should count exactly one
1910 # commit per branch, because we only fill a branch once per
1911 # CVSCommit. This list tracks which branches we've already
1912 # counted.
1913 accounted_for_sym_names = [ ]
1915 def fill_needed(c_rev, pm):
1916 """Return 1 if this is the first commit on a new branch (for
1917 this file) and we need to fill the branch; else return 0
1918 (meaning that some other file's first commit on the branch has
1919 already done the fill for us).
1921 If C_REV.op is OP_ADD, only return 1 if the branch that this
1922 commit is on has no last filled revision.
1924 PM is a PersistenceManager to query.
1927 # Different '.' counts indicate that c_rev is now on a different
1928 # line of development (and may need a fill)
1929 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1930 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1931 # It should be the case that when we have a file F that
1932 # is added on branch B (thus, F on trunk is in state
1933 # 'dead'), we generate an SVNCommit to fill B iff the branch
1934 # has never been filled before.
1936 # If this c_rev.op == OP_ADD, *and* the branch has never
1937 # been filled before, then fill it now. Otherwise, no need to
1938 # fill it.
1939 if c_rev.op == OP_ADD:
1940 if pm.last_filled.get(c_rev.branch_name, None) is None:
1941 return 1
1942 else:
1943 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1944 return 1
1945 return 0
1947 for c_rev in self.changes + self.deletes:
1948 # If a commit is on a branch, we must ensure that the branch
1949 # path being committed exists (in HEAD of the Subversion
1950 # repository). If it doesn't exist, we will need to fill the
1951 # branch. After the fill, the path on which we're committing
1952 # will exist.
1953 if c_rev.branch_name \
1954 and c_rev.branch_name not in accounted_for_sym_names \
1955 and c_rev.branch_name not in self.done_symbols \
1956 and fill_needed(c_rev, Ctx()._persistence_manager):
1957 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1958 % c_rev.branch_name)
1959 svn_commit.set_symbolic_name(c_rev.branch_name)
1960 self.secondary_commits.append(svn_commit)
1961 accounted_for_sym_names.append(c_rev.branch_name)
1963 def _commit(self):
1964 """Generates the primary SVNCommit that corresponds the this
1965 CVSCommit."""
1966 # Generate an SVNCommit unconditionally. Even if the only change
1967 # in this CVSCommit is a deletion of an already-deleted file (that
1968 # is, a CVS revision in state 'dead' whose predecessor was also in
1969 # state 'dead'), the conversion will still generate a Subversion
1970 # revision containing the log message for the second dead
1971 # revision, because we don't want to lose that information.
1972 svn_commit = SVNCommit("commit")
1973 self.motivating_commit = svn_commit
1975 for c_rev in self.changes:
1976 svn_commit.add_revision(c_rev)
1977 # Only make a change if we need to. When 1.1.1.1 has an empty
1978 # deltatext, the explanation is almost always that we're looking
1979 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1980 # such imports, CVS creates an RCS file where 1.1 has the
1981 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1982 # content as 1.1. There's no reason to reflect this non-change
1983 # in the repository, so we want to do nothing in this case. (If
1984 # we were really paranoid, we could make sure 1.1's log message
1985 # is the CVS-generated "Initial revision\n", but I think the
1986 # conditions below are strict enough.)
1987 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1988 and (c_rev.rev == "1.1.1.1")):
1989 if c_rev.is_default_branch_revision():
1990 self.default_branch_cvs_revisions.append(c_rev)
1992 for c_rev in self.deletes:
1993 # When a file is added on a branch, CVS not only adds the file
1994 # on the branch, but generates a trunk revision (typically
1995 # 1.1) for that file in state 'dead'. We only want to add
1996 # this revision if the log message is not the standard cvs
1997 # fabricated log message.
1998 if c_rev.prev_rev is None:
1999 # c_rev.branches may be empty if the originating branch
2000 # has been excluded.
2001 if not c_rev.branches:
2002 continue
2003 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2004 % (c_rev.filename(),
2005 c_rev.branches[0]))
2006 author, log_msg = \
2007 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2008 if log_msg == cvs_generated_msg:
2009 continue
2011 svn_commit.add_revision(c_rev)
2012 if c_rev.is_default_branch_revision():
2013 self.default_branch_cvs_revisions.append(c_rev)
2015 # There is a slight chance that we didn't actually register any
2016 # CVSRevisions with our SVNCommit (see loop over self.deletes
2017 # above), so if we have no CVSRevisions, we don't flush the
2018 # svn_commit to disk and roll back our revnum.
2019 if len(svn_commit.cvs_revs) > 0:
2020 svn_commit.flush()
2021 else:
2022 # We will not be flushing this SVNCommit, so rollback the
2023 # SVNCommit revision counter.
2024 SVNCommit.revnum = SVNCommit.revnum - 1
2026 if not Ctx().trunk_only:
2027 for c_rev in self.revisions():
2028 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2030 def _post_commit(self):
2031 """Generates any SVNCommits that we can perform now that _commit
2032 has happened. That is, handle non-trunk default branches.
2033 Sometimes an RCS file has a non-trunk default branch, so a commit
2034 on that default branch would be visible in a default CVS checkout
2035 of HEAD. If we don't copy that commit over to Subversion's trunk,
2036 then there will be no Subversion tree which corresponds to that
2037 CVS checkout. Of course, in order to copy the path over, we may
2038 first need to delete the existing trunk there. """
2040 # Only generate a commit if we have default branch revs
2041 if len(self.default_branch_cvs_revisions):
2042 # Generate an SVNCommit for all of our default branch c_revs.
2043 svn_commit = SVNCommit("post-commit default branch(es)")
2044 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2045 for c_rev in self.default_branch_cvs_revisions:
2046 svn_commit.add_revision(c_rev)
2047 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2048 svn_commit.revnum)
2049 self.secondary_commits.append(svn_commit)
2051 def process_revisions(self, done_symbols):
2052 """Process all the CVSRevisions that this instance has, creating
2053 one or more SVNCommits in the process. Generate fill SVNCommits
2054 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2055 fills).
2057 Return the primary SVNCommit that corresponds to this CVSCommit.
2058 The returned SVNCommit is the commit that motivated any other
2059 SVNCommits generated in this CVSCommit."""
2060 self.done_symbols = done_symbols
2061 seconds = self.t_max - self.t_min + 1
2063 Log().write(LOG_VERBOSE, '-' * 60)
2064 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2065 if seconds == 1:
2066 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2067 % time.ctime(self.t_max))
2068 else:
2069 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2070 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2071 % (time.ctime(self.t_max), seconds))
2073 if seconds > COMMIT_THRESHOLD + 1:
2074 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2075 % (warning_prefix, COMMIT_THRESHOLD))
2077 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2078 self._commit()
2079 return self.motivating_commit
2081 self._pre_commit()
2082 self._commit()
2083 self._post_commit()
2085 for svn_commit in self.secondary_commits:
2086 svn_commit.set_date(self.motivating_commit.get_date())
2087 svn_commit.flush()
2089 return self.motivating_commit
2092 class SVNCommit:
2093 """This represents one commit to the Subversion Repository. There
2094 are three types of SVNCommits:
2096 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2098 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2100 3. Updates trunk to reflect the contents of a particular branch
2101 (this is to handle RCS default branches)."""
2103 # The revision number to assign to the next new SVNCommit.
2104 # We start at 2 because SVNRepositoryMirror uses the first commit
2105 # to create trunk, tags, and branches.
2106 revnum = 2
2108 class SVNCommitInternalInconsistencyError(Exception):
2109 """Exception raised if we encounter an impossible state in the
2110 SVNCommit Databases."""
2111 pass
2113 def __init__(self, description="", revnum=None, cvs_revs=None):
2114 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2115 If REVNUM, the SVNCommit will correspond to that revision number;
2116 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2117 REVNUM.
2119 It is an error to pass CVS_REVS without REVNUM, but you may pass
2120 REVNUM without CVS_REVS, and then add a revision at a time by
2121 invoking add_revision()."""
2122 self._description = description
2124 # Revprop metadata for this commit.
2126 # These initial values are placeholders. At least the log and the
2127 # date should be different by the time these are used.
2129 # They are private because their values should be returned encoded
2130 # in UTF8, but callers aren't required to set them in UTF8.
2131 # Therefore, accessor methods are used to set them, and
2132 # self.get_revprops() is used to to get them, in dictionary form.
2133 self._author = Ctx().username
2134 self._log_msg = "This log message means an SVNCommit was used too soon."
2135 self._max_date = 0 # Latest date seen so far.
2137 self.cvs_revs = cvs_revs or []
2138 if revnum:
2139 self.revnum = revnum
2140 else:
2141 self.revnum = SVNCommit.revnum
2142 SVNCommit.revnum = SVNCommit.revnum + 1
2144 # The symbolic name that is filled in this SVNCommit, if any
2145 self.symbolic_name = None
2147 # If this commit is a default branch synchronization, this
2148 # variable represents the subversion revision number of the
2149 # *primary* commit where the default branch changes actually
2150 # happened. It is None otherwise.
2152 # It is possible for multiple synchronization commits to refer to
2153 # the same motivating commit revision number, and it is possible
2154 # for a single synchronization commit to contain CVSRevisions on
2155 # multiple different default branches.
2156 self.motivating_revnum = None
2158 # is_tag is true only if this commit is a fill of a symbolic name
2159 # that is a tag, None in all other cases.
2160 self.is_tag = None
2162 def set_symbolic_name(self, name):
2163 "Set self.symbolic_name to NAME."
2164 name = _clean_symbolic_name(name)
2165 self.symbolic_name = name
2167 def set_motivating_revnum(self, revnum):
2168 "Set self.motivating_revnum to REVNUM."
2169 self.motivating_revnum = revnum
2171 def set_author(self, author):
2172 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2173 This is the only way to set an SVNCommit's author."""
2174 self._author = author
2176 def set_log_msg(self, msg):
2177 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2178 This is the only way to set an SVNCommit's log message."""
2179 self._log_msg = msg
2181 def set_date(self, date):
2182 """Set this SVNCommit's date to DATE (an integer).
2183 Note that self.add_revision() updates this automatically based on
2184 a CVSRevision; so you may not need to call this at all, and even
2185 if you do, the value may be overwritten by a later call to
2186 self.add_revision()."""
2187 self._max_date = date
2189 def get_date(self):
2190 """Returns this SVNCommit's date as an integer."""
2191 return self._max_date
2193 def get_revprops(self):
2194 """Return the Subversion revprops for this SVNCommit."""
2195 date = format_date(self._max_date)
2196 try:
2197 ### FIXME: The 'replace' behavior should be an option, like
2198 ### --encoding is.
2199 utf8_author = None
2200 if self._author is not None:
2201 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
2202 utf8_author = unicode_author.encode('utf8')
2203 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
2204 utf8_log = unicode_log.encode('utf8')
2205 return { 'svn:author' : utf8_author,
2206 'svn:log' : utf8_log,
2207 'svn:date' : date }
2208 except UnicodeError:
2209 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2210 % warning_prefix)
2211 Log().write(LOG_WARN, " author: '%s'" % self._author)
2212 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2213 Log().write(LOG_WARN, " date: '%s'" % date)
2214 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
2215 for c_rev in self.cvs_revs:
2216 Log().write(LOG_WARN, " ", c_rev.fname)
2218 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2219 "'--encoding=latin1'.\n")
2220 # It's better to fall back to the original (unknown encoding) data
2221 # than to either 1) quit or 2) record nothing at all.
2222 return { 'svn:author' : self._author,
2223 'svn:log' : self.get_log_msg(),
2224 'svn:date' : date }
2226 def add_revision(self, cvs_rev):
2227 self.cvs_revs.append(cvs_rev)
2228 if cvs_rev.timestamp > self._max_date:
2229 self._max_date = cvs_rev.timestamp
2231 def _is_primary_commit(self):
2232 """Return true if this is a primary SVNCommit, false otherwise."""
2233 return not (self.symbolic_name or self.motivating_revnum)
2235 def flush(self):
2236 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
2237 % (self.revnum, self._description))
2238 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2240 if self.motivating_revnum is not None:
2241 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2242 self.motivating_revnum)
2244 # If we're not a primary commit, then store our date and/or our
2245 # symbolic_name
2246 if not self._is_primary_commit():
2247 Ctx()._persistence_manager.set_name_and_date(self.revnum,
2248 self.symbolic_name,
2249 self._max_date)
2251 def __str__(self):
2252 """ Print a human-readable description of this SVNCommit. This
2253 description is not intended to be machine-parseable (although
2254 we're not going to stop you if you try!)"""
2256 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2257 if self.symbolic_name:
2258 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
2259 else:
2260 ret = ret + " NO symbolic name\n"
2261 ret = ret + " debug description: " + self._description + "\n"
2262 ret = ret + " cvs_revs:\n"
2263 for c_rev in self.cvs_revs:
2264 ret = ret + " " + c_rev.unique_key() + "\n"
2265 return ret
2267 def get_log_msg(self):
2268 """Returns the actual log message for a primary commit, and the
2269 appropriate manufactured log message for a secondary commit."""
2270 if self.symbolic_name is not None:
2271 return self._log_msg_for_symbolic_name_commit()
2272 elif self.motivating_revnum is not None:
2273 return self._log_msg_for_default_branch_commit()
2274 else:
2275 return self._log_msg
2277 def _log_msg_for_symbolic_name_commit(self):
2278 """Creates a log message for a manufactured commit that fills
2279 self.symbolic_name. If self.is_tag is true, write the log message
2280 as though for a tag, else write it as though for a branch."""
2281 type = 'branch'
2282 if self.is_tag:
2283 type = 'tag'
2285 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2286 space_or_newline = ' '
2287 if len(self.symbolic_name) >= 13:
2288 space_or_newline = '\n'
2290 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2291 % (type, space_or_newline, self.symbolic_name)
2293 def _log_msg_for_default_branch_commit(self):
2294 """Creates a log message for a manufactured commit that
2295 synchronizes a non-trunk default branch with trunk."""
2296 msg = 'This commit was generated by cvs2svn to compensate for ' \
2297 'changes in r%d,\n' \
2298 'which included commits to RCS files with non-trunk default ' \
2299 'branches.\n' % self.motivating_revnum
2300 return msg
2302 class CVSRevisionAggregator:
2303 """This class groups CVSRevisions into CVSCommits that represent
2304 at least one SVNCommit."""
2305 def __init__(self):
2306 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2307 if not Ctx().trunk_only:
2308 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_READ)
2309 self.cvs_commits = {}
2310 self.pending_symbols = {}
2311 # A list of symbols for which we've already encountered the last
2312 # CVSRevision that is a source for that symbol. That is, the
2313 # final fill for this symbol has been done, and we never need to
2314 # fill it again.
2315 self.done_symbols = [ ]
2317 # This variable holds the most recently created primary svn_commit
2318 # object. CVSRevisionAggregator maintains this variable merely
2319 # for its date, so that it can set dates for the SVNCommits
2320 # created in self.attempt_to_commit_symbols().
2321 self.latest_primary_svn_commit = None
2323 Ctx()._symbolings_logger = SymbolingsLogger()
2324 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2325 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2326 DB_OPEN_READ)
2329 def process_revision(self, c_rev):
2330 # Each time we read a new line, we scan the commits we've
2331 # accumulated so far to see if any are ready for processing now.
2332 ready_queue = [ ]
2333 for digest_key, cvs_commit in self.cvs_commits.items():
2334 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2335 ready_queue.append(cvs_commit)
2336 del self.cvs_commits[digest_key]
2337 continue
2338 # If the inbound commit is on the same file as a pending commit,
2339 # close the pending commit to further changes. Don't flush it though,
2340 # as there may be other pending commits dated before this one.
2341 # ### ISSUE: the has_file() check below is not optimal.
2342 # It does fix the dataloss bug where revisions would get lost
2343 # if checked in too quickly, but it can also break apart the
2344 # commits. The correct fix would require tracking the dependencies
2345 # between change sets and committing them in proper order.
2346 if cvs_commit.has_file(c_rev.fname):
2347 unused_id = digest_key + '-'
2348 # Find a string that does is not already a key in
2349 # the self.cvs_commits dict
2350 while self.cvs_commits.has_key(unused_id):
2351 unused_id = unused_id + '-'
2352 self.cvs_commits[unused_id] = cvs_commit
2353 del self.cvs_commits[digest_key]
2355 # Add this item into the set of still-available commits.
2356 if self.cvs_commits.has_key(c_rev.digest):
2357 cvs_commit = self.cvs_commits[c_rev.digest]
2358 else:
2359 author, log = self.metadata_db[c_rev.digest]
2360 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2361 author, log)
2362 cvs_commit = self.cvs_commits[c_rev.digest]
2363 cvs_commit.add_revision(c_rev)
2365 # If there are any elements in the ready_queue at this point, they
2366 # need to be processed, because this latest rev couldn't possibly
2367 # be part of any of them. Sort them into time-order, then process
2368 # 'em.
2369 ready_queue.sort()
2371 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2372 # commits are ready.
2373 if len(ready_queue) == 0:
2374 self.attempt_to_commit_symbols(ready_queue, c_rev)
2376 for cvs_commit in ready_queue[:]:
2377 self.latest_primary_svn_commit \
2378 = cvs_commit.process_revisions(self.done_symbols)
2379 ready_queue.remove(cvs_commit)
2380 self.attempt_to_commit_symbols(ready_queue, c_rev)
2382 def flush(self):
2383 """Commit anything left in self.cvs_commits. Then inform the
2384 SymbolingsLogger that all commits are done."""
2386 ready_queue = [ ]
2387 for k, v in self.cvs_commits.items():
2388 ready_queue.append((v, k))
2390 ready_queue.sort()
2391 for cvs_commit_tuple in ready_queue[:]:
2392 self.latest_primary_svn_commit = \
2393 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2394 ready_queue.remove(cvs_commit_tuple)
2395 del self.cvs_commits[cvs_commit_tuple[1]]
2396 self.attempt_to_commit_symbols([])
2398 if not Ctx().trunk_only:
2399 Ctx()._symbolings_logger.close()
2401 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2403 This function generates 1 SVNCommit for each symbol in
2404 self.pending_symbols that doesn't have an opening CVSRevision in
2405 either QUEUED_COMMITS or self.cvs_commits.values().
2407 If C_REV is not None, then we first add to self.pending_symbols
2408 any symbols from C_REV that C_REV is the last CVSRevision for.
2410 # If we're not doing a trunk-only conversion, get the symbolic
2411 # names that this c_rev is the last *source* CVSRevision for and
2412 # add them to those left over from previous passes through the
2413 # aggregator.
2414 if c_rev and not Ctx().trunk_only:
2415 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2416 self.pending_symbols[sym] = None
2418 # Make a list of all symbols that still have *source* CVSRevisions
2419 # in the pending commit queue (self.cvs_commits).
2420 open_symbols = {}
2421 for sym in self.pending_symbols.keys():
2422 for cvs_commit in self.cvs_commits.values() + queued_commits:
2423 if cvs_commit.opens_symbolic_name(sym):
2424 open_symbols[sym] = None
2425 break
2427 # Sort the pending symbols so that we will always process the
2428 # symbols in the same order, regardless of the order in which the
2429 # dict hashing algorithm hands them back to us. We do this so
2430 # that our tests will get the same results on all platforms.
2431 sorted_pending_symbols_keys = self.pending_symbols.keys()
2432 sorted_pending_symbols_keys.sort()
2433 for sym in sorted_pending_symbols_keys:
2434 if open_symbols.has_key(sym): # sym is still open--don't close it.
2435 continue
2436 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2437 svn_commit.set_symbolic_name(sym)
2438 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2439 svn_commit.flush()
2440 self.done_symbols.append(sym)
2441 del self.pending_symbols[sym]
2444 class SymbolingsReader:
2445 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2446 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2447 returning the correct opening and closing Subversion revision
2448 numbers for a given symbolic name."""
2449 def __init__(self):
2450 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2451 reads the offsets database into memory."""
2452 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2453 # The offsets_db is really small, and we need to read and write
2454 # from it a fair bit, so suck it into memory
2455 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2456 self.offsets = { }
2457 for key in offsets_db.db.keys():
2458 #print " ZOO:", key, offsets_db[key]
2459 self.offsets[key] = offsets_db[key]
2461 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2462 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2463 SymbolicNameFillingGuide object.
2465 Note that if we encounter an opening rev in this fill, but the
2466 corresponding closing rev takes place later than SVN_REVNUM, the
2467 closing will not be passed to SymbolicNameFillingGuide in this
2468 fill (and will be discarded when encountered in a later fill).
2469 This is perfectly fine, because we can still do a valid fill
2470 without the closing--we always try to fill what we can as soon as
2471 we can."""
2472 # It's possible to have a branch start with a file that was added
2473 # on a branch
2474 if not self.offsets.has_key(symbolic_name):
2475 return SymbolicNameFillingGuide(symbolic_name)
2476 # set our read offset for self.symbolings to the offset for
2477 # symbolic_name
2478 self.symbolings.seek(self.offsets[symbolic_name])
2480 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2481 while (1):
2482 fpos = self.symbolings.tell()
2483 line = self.symbolings.readline().rstrip()
2484 if not line:
2485 break
2486 name, revnum, type, svn_path = line.split(" ", 3)
2487 revnum = int(revnum)
2488 if (revnum > svn_revnum
2489 or name != symbolic_name):
2490 break
2491 symbol_fill.register(svn_path, revnum, type)
2493 # get current offset of the read marker and set it to the offset
2494 # for the beginning of the line we just read if we used anything
2495 # we read.
2496 if not symbol_fill.is_empty():
2497 self.offsets[symbolic_name] = fpos
2499 symbol_fill.make_node_tree()
2500 return symbol_fill
2503 class SymbolicNameFillingGuide:
2504 """A SymbolicNameFillingGuide is essentially a node tree
2505 representing the source paths to be copied to fill
2506 self.symbolic_name in the current SVNCommit.
2508 After calling self.register() on a series of openings and closings,
2509 call self.make_node_tree() to prepare self.node_tree for
2510 examination. See the docstring for self.make_node_tree() for
2511 details on the structure of self.node_tree.
2513 By walking self.node_tree and calling self.get_best_revnum() on each
2514 node, the caller can determine what subversion revision number to
2515 copy the path corresponding to that node from. self.node_tree
2516 should be treated as read-only.
2518 The caller can then descend to sub-nodes to see if their "best
2519 revnum" differs from their parents' and if it does, take appropriate
2520 actions to "patch up" the subtrees."""
2521 def __init__(self, symbolic_name):
2522 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2523 prepares it for receiving openings and closings.
2525 Returns a fully functional and armed SymbolicNameFillingGuide
2526 object."""
2527 self.name = symbolic_name
2529 self.opening_key = "/o"
2530 self.closing_key = "/c"
2532 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2534 # { svn_path : { self.opening_key : svn_revnum,
2535 # self.closing_key : svn_revnum }
2536 # ...}
2537 self.things = { }
2539 # The key for the root node of the node tree
2540 self.root_key = '0'
2541 # The dictionary that holds our node tree, seeded with the root key.
2542 self.node_tree = { self.root_key : { } }
2544 def get_best_revnum(self, node, preferred_revnum):
2545 """Determine the best subversion revision number to use when
2546 copying the source tree beginning at NODE. Returns a
2547 subversion revision number.
2549 PREFERRED_REVNUM is passed to self._best_rev and used to
2550 calculate the best_revnum."""
2551 revnum = SVN_INVALID_REVNUM
2553 # Aggregate openings and closings from the rev tree
2554 openings = self._list_revnums_for_key(node, self.opening_key)
2555 closings = self._list_revnums_for_key(node, self.closing_key)
2557 # Score the lists
2558 scores = self._score_revisions(self._sum_revnum_counts(openings),
2559 self._sum_revnum_counts(closings))
2561 revnum, max_score = self._best_rev(scores, preferred_revnum)
2563 if revnum == SVN_INVALID_REVNUM:
2564 sys.stderr.write(error_prefix + ": failed to find a revision "
2565 + "to copy from when copying %s\n" % name)
2566 sys.exit(1)
2567 return revnum, max_score
2570 def _best_rev(self, scores, preferred_rev):
2571 """Return the revision with the highest score from SCORES, a list
2572 returned by _score_revisions(). When the maximum score is shared
2573 by multiple revisions, the oldest revision is selected, unless
2574 PREFERRED_REV is one of the possibilities, in which case, it is
2575 selected."""
2576 max_score = 0
2577 preferred_rev_score = -1
2578 rev = SVN_INVALID_REVNUM
2579 if preferred_rev is None:
2580 # Comparison order of different types is arbitrary. Do not
2581 # expect None to compare less than int values below.
2582 # In Python 2.3 None compares with ints like negative infinity.
2583 # In Python 2.0 None compares with ints like positive infinity.
2584 preferred_rev = SVN_INVALID_REVNUM
2585 for revnum, count in scores:
2586 if count > max_score:
2587 max_score = count
2588 rev = revnum
2589 if revnum <= preferred_rev:
2590 preferred_rev_score = count
2591 if preferred_rev_score == max_score:
2592 rev = preferred_rev
2593 return rev, max_score
2596 def _score_revisions(self, openings, closings):
2597 """Return a list of revisions and scores based on OPENINGS and
2598 CLOSINGS. The returned list looks like:
2600 [(REV1 SCORE1), (REV2 SCORE2), ...]
2602 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2603 self.opening__key and self.closing_key from some file or
2604 directory node, or else None.
2606 Each score indicates that copying the corresponding revision (or
2607 any following revision up to the next revision in the list) of the
2608 object in question would yield that many correct paths at or
2609 underneath the object. There may be other paths underneath it
2610 which are not correct and would need to be deleted or recopied;
2611 those can only be detected by descending and examining their
2612 scores.
2614 If OPENINGS is false, return the empty list."""
2615 # First look for easy outs.
2616 if not openings:
2617 return []
2619 # Must be able to call len(closings) below.
2620 if closings is None:
2621 closings = []
2623 # No easy out, so wish for lexical closures and calculate the scores :-).
2624 scores = []
2625 opening_score_accum = 0
2626 for i in range(len(openings)):
2627 opening_rev, opening_score = openings[i]
2628 opening_score_accum = opening_score_accum + opening_score
2629 scores.append((opening_rev, opening_score_accum))
2630 min = 0
2631 for i in range(len(closings)):
2632 closing_rev, closing_score = closings[i]
2633 done_exact_rev = None
2634 insert_index = None
2635 insert_score = None
2636 for j in range(min, len(scores)):
2637 score_rev, score = scores[j]
2638 if score_rev >= closing_rev:
2639 if not done_exact_rev:
2640 if score_rev > closing_rev:
2641 insert_index = j
2642 insert_score = scores[j-1][1] - closing_score
2643 done_exact_rev = 1
2644 scores[j] = (score_rev, score - closing_score)
2645 else:
2646 min = j + 1
2647 if not done_exact_rev:
2648 scores.append((closing_rev,scores[-1][1] - closing_score))
2649 if insert_index is not None:
2650 scores.insert(insert_index, (closing_rev, insert_score))
2651 return scores
2653 def _sum_revnum_counts(self, rev_list):
2654 """Takes an array of revisions (REV_LIST), for example:
2656 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2658 and adds up every occurrence of each revision and returns a sorted
2659 array of tuples containing (svn_revnum, count):
2661 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2663 s = {}
2664 for k in rev_list: # Add up the scores
2665 if s.has_key(k):
2666 s[k] = s[k] + 1
2667 else:
2668 s[k] = 1
2669 a = s.items()
2670 a.sort()
2671 return a
2673 def _list_revnums_for_key(self, node, revnum_type_key):
2674 """Scan self.node_tree and return a list of all the revision
2675 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2676 for all leaf nodes at and under NODE.
2678 REVNUM_TYPE_KEY should be either self.opening_key or
2679 self.closing_key."""
2680 revnums = []
2682 # If the node has self.opening_key, it must be a leaf node--all
2683 # leaf nodes have at least an opening key (although they may not
2684 # have a closing key. Fetch revnum and return
2685 if (self.node_tree[node].has_key(self.opening_key) and
2686 self.node_tree[node].has_key(revnum_type_key)):
2687 revnums.append(self.node_tree[node][revnum_type_key])
2688 return revnums
2690 for key, node_contents in self.node_tree[node].items():
2691 if key[0] == '/':
2692 continue
2693 revnums = revnums + \
2694 self._list_revnums_for_key(node_contents, revnum_type_key)
2695 return revnums
2697 def register(self, svn_path, svn_revnum, type):
2698 """Collects opening and closing revisions for this
2699 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2700 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2701 first svn revision number that we can copy from (our opening), or
2702 the last (not inclusive) svn revision number that we can copy from
2703 (our closing). TYPE indicates whether this path is an opening or a
2704 a closing.
2706 The opening for a given SVN_PATH must be passed before the closing
2707 for it to have any effect... any closing encountered before a
2708 corresponding opening will be discarded.
2710 It is not necessary to pass a corresponding closing for every
2711 opening.
2713 # Always log an OPENING
2714 if type == OPENING:
2715 self.things[svn_path] = {self.opening_key: svn_revnum}
2716 # Only log a closing if we've already registered the opening for that path.
2717 elif type == CLOSING and self.things.has_key(svn_path):
2718 # When we have a non-trunk default branch, we may have multiple
2719 # closings--only register the first closing we encounter.
2720 if not self.things[svn_path].has_key(self.closing_key):
2721 self.things[svn_path][self.closing_key] = svn_revnum
2723 def make_node_tree(self):
2724 """Generates the SymbolicNameFillingGuide's node tree from
2725 self.things. Each leaf node maps self.opening_key to the earliest
2726 subversion revision from which this node/path may be copied; and
2727 optionally map self.closing_key to the subversion revision one
2728 higher than the last revision from which this node/path may be
2729 copied. Intermediate nodes never contain opening or closing
2730 flags."""
2732 for svn_path, open_close in self.things.items():
2733 parent_key = self.root_key
2735 path_so_far = ""
2736 # Walk up the path, one node at a time.
2737 components = svn_path.split('/')
2738 for component in components:
2739 path_so_far = path_so_far + '/' + component
2741 child_key = None
2742 if not self.node_tree[parent_key].has_key(component):
2743 child_key = gen_key()
2744 self.node_tree[child_key] = { }
2745 self.node_tree[parent_key][component] = child_key
2746 else:
2747 child_key = self.node_tree[parent_key][component]
2749 parent_key = child_key
2750 # Having reached the leaf, attach the value
2751 self.node_tree[parent_key] = open_close
2752 #print_node_tree(self.node_tree, self.root_key)
2754 def is_empty(self):
2755 """Return true if we haven't accumulated any openings or closings,
2756 false otherwise."""
2757 return not len(self.things)
2760 class FillSource:
2761 """Representation of a fill source used by the symbol filler in
2762 SVNRepositoryMirror."""
2763 def __init__(self, prefix, key):
2764 """Create an unscored fill source with a prefix and a key."""
2765 self.prefix = prefix
2766 self.key = key
2767 self.score = None
2768 self.revnum = None
2770 def set_score(self, score, revnum):
2771 """Set the SCORE and REVNUM."""
2772 self.score = score
2773 self.revnum = revnum
2775 def __cmp__(self, other):
2776 """Comparison operator used to sort FillSources in descending
2777 score order."""
2778 if self.score is None or other.score is None:
2779 raise TypeError, 'Tried to compare unscored FillSource'
2780 return cmp(other.score, self.score)
2783 class SVNRepositoryMirror:
2784 """Mirror a Subversion Repository as it is constructed, one
2785 SVNCommit at a time. The mirror is skeletal; it does not contain
2786 file contents. The creation of a dumpfile or Subversion repository
2787 is handled by delegates. See self.add_delegate method for how to
2788 set delegates.
2790 The structure of the repository is kept in two databases and one
2791 hash. The revs_db database maps revisions to root node keys, and
2792 the nodes_db database maps node keys to nodes. A node is a hash
2793 from directory names to keys. Both the revs_db and the nodes_db are
2794 stored on disk and each access is expensive.
2796 The nodes_db database only has the keys for old revisions. The
2797 revision that is being contructed is kept in memory in the new_nodes
2798 hash which is cheap to access.
2800 You must invoke _start_commit between SVNCommits.
2802 *** WARNING *** All path arguments to methods in this class CANNOT
2803 have leading or trailing slashes.
2806 class SVNRepositoryMirrorPathExistsError(Exception):
2807 """Exception raised if an attempt is made to add a path to the
2808 repository mirror and that path already exists in the youngest
2809 revision of the repository."""
2810 pass
2812 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2813 """Exception raised if a CVSRevision is found to have an unexpected
2814 operation (OP) value."""
2815 pass
2817 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2818 """Exception raised if an empty SymbolicNameFillingGuide is returned
2819 during a fill where the branch in question already exists."""
2820 pass
2822 def __init__(self):
2823 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2824 self.delegates = [ ]
2826 # This corresponds to the 'revisions' table in a Subversion fs.
2827 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
2828 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
2830 # This corresponds to the 'nodes' table in a Subversion fs. (We
2831 # don't need a 'representations' or 'strings' table because we
2832 # only track metadata, not file contents.)
2833 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
2834 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
2836 # Start at revision 0 without a root node. It will be created
2837 # by _open_writable_root_node.
2838 self.youngest = 0
2839 self.new_root_key = None
2840 self.new_nodes = { }
2842 if not Ctx().trunk_only:
2843 ###PERF IMPT: Suck this into memory.
2844 self.tags_db = TagsDatabase(DB_OPEN_READ)
2845 self.symbolings_reader = SymbolingsReader()
2847 def _initialize_repository(self, date):
2848 """Initialize the repository by creating the directories for
2849 trunk, tags, and branches. This method should only be called
2850 after all delegates are added to the repository mirror."""
2851 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2852 # magic therein
2853 svn_commit = SVNCommit("Initialization", 1)
2854 svn_commit.set_date(date)
2855 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2857 self._start_commit(svn_commit)
2858 self._mkdir(Ctx().trunk_base)
2859 if not Ctx().trunk_only:
2860 self._mkdir(Ctx().branches_base)
2861 self._mkdir(Ctx().tags_base)
2863 def _start_commit(self, svn_commit):
2864 """Start a new commit."""
2865 if self.youngest > 0:
2866 self._end_commit()
2868 self.youngest = svn_commit.revnum
2869 self.new_root_key = None
2870 self.new_nodes = { }
2872 self._invoke_delegates('start_commit', svn_commit)
2874 def _end_commit(self):
2875 """Called at the end of each commit. This method copies the newly
2876 created nodes to the on-disk nodes db."""
2877 if self.new_root_key is None:
2878 # No changes were made in this revision, so we make the root node
2879 # of the new revision be the same as the last one.
2880 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2881 else:
2882 self.revs_db[str(self.youngest)] = self.new_root_key
2883 # Copy the new nodes to the nodes_db
2884 for key, value in self.new_nodes.items():
2885 self.nodes_db[key] = value
2887 def _get_node(self, key):
2888 """Returns the node contents for KEY which may refer to either
2889 self.nodes_db or self.new_nodes."""
2890 if self.new_nodes.has_key(key):
2891 return self.new_nodes[key]
2892 else:
2893 return self.nodes_db[key]
2895 def _open_readonly_node(self, path, revnum):
2896 """Open a readonly node for PATH at revision REVNUM. Returns the
2897 node key and node contents if the path exists, else (None, None)."""
2898 # Get the root key
2899 if revnum == self.youngest:
2900 if self.new_root_key is None:
2901 node_key = self.revs_db[str(self.youngest - 1)]
2902 else:
2903 node_key = self.new_root_key
2904 else:
2905 node_key = self.revs_db[str(revnum)]
2907 for component in path.split('/'):
2908 node_contents = self._get_node(node_key)
2909 if not node_contents.has_key(component):
2910 return None
2911 node_key = node_contents[component]
2913 return node_key
2915 def _open_writable_root_node(self):
2916 """Open a writable root node. The current root node is returned
2917 immeditely if it is already writable. If not, create a new one by
2918 copying the contents of the root node of the previous version."""
2919 if self.new_root_key is not None:
2920 return self.new_root_key, self.new_nodes[self.new_root_key]
2922 if self.youngest < 2:
2923 new_contents = { }
2924 else:
2925 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2926 self.new_root_key = gen_key()
2927 self.new_nodes = { self.new_root_key: new_contents }
2929 return self.new_root_key, new_contents
2931 def _open_writable_node(self, svn_path, create):
2932 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2933 and any missing directories if CREATE is True."""
2934 parent_key, parent_contents = self._open_writable_root_node()
2936 # Walk up the path, one node at a time.
2937 path_so_far = None
2938 components = svn_path.split('/')
2939 for i in range(len(components)):
2940 component = components[i]
2941 this_key = this_contents = None
2942 path_so_far = _path_join(path_so_far, component)
2943 if parent_contents.has_key(component):
2944 # The component exists.
2945 this_key = parent_contents[component]
2946 if self.new_nodes.has_key(this_key):
2947 this_contents = self.new_nodes[this_key]
2948 else:
2949 # Suck the node from the nodes_db, but update the key
2950 this_contents = self.nodes_db[this_key]
2951 this_key = gen_key()
2952 self.new_nodes[this_key] = this_contents
2953 parent_contents[component] = this_key
2954 elif create:
2955 # The component does not exists, so we create it.
2956 this_contents = { }
2957 this_key = gen_key()
2958 self.new_nodes[this_key] = this_contents
2959 parent_contents[component] = this_key
2960 if i < len(components) - 1:
2961 self._invoke_delegates('mkdir', path_so_far)
2962 else:
2963 # The component does not exists and we are not instructed to
2964 # create it, so we give up.
2965 return None, None
2967 parent_key = this_key
2968 parent_contents = this_contents
2970 return this_key, this_contents
2972 def _path_exists(self, path):
2973 """If PATH exists in self.youngest of the svn repository mirror,
2974 return true, else return None.
2976 PATH must not start with '/'."""
2977 return self._open_readonly_node(path, self.youngest) is not None
2979 def _fast_delete_path(self, parent_path, parent_contents, component):
2980 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2981 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2982 in PARENT_CONTENTS."""
2983 if parent_contents.has_key(component):
2984 del parent_contents[component]
2985 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2987 def _delete_path(self, svn_path, should_prune=False):
2988 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2989 all ancestor directories that are made empty when SVN_PATH is deleted.
2990 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2992 NOTE: This function does *not* allow you delete top-level entries
2993 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2994 those entries."""
2995 pos = svn_path.rfind('/')
2996 parent_path = svn_path[:pos]
2997 entry = svn_path[pos+1:]
2998 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2999 if parent_key is not None:
3000 self._fast_delete_path(parent_path, parent_contents, entry)
3001 # The following recursion makes pruning an O(n^2) operation in the
3002 # worst case (where n is the depth of SVN_PATH), but the worst case
3003 # is probably rare, and the constant cost is pretty low. Another
3004 # drawback is that we issue a delete for each path and not just
3005 # a single delete for the topmost directory pruned.
3006 if (should_prune and len(parent_contents) == 0 and
3007 parent_path.find('/') != -1):
3008 self._delete_path(parent_path, True)
3010 def _mkdir(self, path):
3011 """Create PATH in the repository mirror at the youngest revision."""
3012 self._open_writable_node(path, True)
3013 self._invoke_delegates('mkdir', path)
3015 def _change_path(self, cvs_rev):
3016 """Register a change in self.youngest for the CVS_REV's svn_path
3017 in the repository mirror."""
3018 # We do not have to update the nodes because our mirror is only
3019 # concerned with the presence or absence of paths, and a file
3020 # content change does not cause any path changes.
3021 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
3023 def _add_path(self, cvs_rev):
3024 """Add the CVS_REV's svn_path to the repository mirror."""
3025 self._open_writable_node(cvs_rev.svn_path, True)
3026 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
3028 def _copy_path(self, src_path, dest_path, src_revnum):
3029 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3030 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3031 parent *must* exist, but DEST_PATH *cannot* exist.
3033 Return the node key and the contents of the new node at DEST_PATH
3034 as a dictionary."""
3035 # get the contents of the node of our src_path
3036 src_key = self._open_readonly_node(src_path, src_revnum)
3037 src_contents = self._get_node(src_key)
3039 # Get the parent path and the base path of the dest_path
3040 pos = dest_path.rindex('/')
3041 dest_parent = dest_path[:pos]
3042 dest_basename = dest_path[pos+1:]
3043 dest_parent_key, dest_parent_contents = \
3044 self._open_writable_node(dest_parent, False)
3046 if dest_parent_contents.has_key(dest_basename):
3047 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3048 msg = msg + "when it already exists in the mirror."
3049 raise self.SVNRepositoryMirrorPathExistsError, msg
3051 dest_parent_contents[dest_basename] = src_key
3052 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3054 # Yes sir, src_key and src_contents are also the contents of the
3055 # destination. This is a cheap copy, remember! :-)
3056 return src_key, src_contents
3058 def _fill_symbolic_name(self, svn_commit):
3059 """Performs all copies necessary to create as much of the the tag
3060 or branch SVN_COMMIT.symbolic_name as possible given the current
3061 revision of the repository mirror.
3063 The symbolic name is guaranteed to exist in the Subversion
3064 repository by the end of this call, even if there are no paths
3065 under it."""
3066 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3067 svn_commit.symbolic_name, self.youngest)
3069 # Create the list of sources for the symbolic name. All source
3070 # prefixes must be direct sources for the destination, i.e. we
3071 # must have 'trunk' and 'branches/my_branch' and not just
3072 # 'branches'.
3073 sources = []
3074 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
3075 if entry == Ctx().trunk_base:
3076 sources.append(FillSource(entry, key))
3077 elif entry == Ctx().branches_base:
3078 for entry2, key2 in symbol_fill.node_tree[key].items():
3079 sources.append(FillSource(entry + '/' + entry2, key2))
3080 else:
3081 raise # Should never happen
3082 if self.tags_db.has_key(svn_commit.symbolic_name):
3083 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
3084 else:
3085 dest_prefix = _path_join(Ctx().branches_base,
3086 svn_commit.symbolic_name)
3088 if sources:
3089 dest_key = self._open_writable_node(dest_prefix, False)[0]
3090 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3091 else:
3092 # We can only get here for a branch whose first commit is an add
3093 # (as opposed to a copy).
3094 dest_path = Ctx().branches_base + '/' + symbol_fill.name
3095 if not self._path_exists(dest_path):
3096 # If our symbol_fill was empty, that means that our first
3097 # commit on the branch was to a file added on the branch, and
3098 # that this is our first fill of that branch.
3100 # This case is covered by test 16.
3102 # ...we create the branch by copying trunk from the our
3103 # current revision number minus 1
3104 source_path = Ctx().trunk_base
3105 entries = self._copy_path(source_path, dest_path,
3106 svn_commit.revnum - 1)[1]
3107 # Now since we've just copied trunk to a branch that's
3108 # *supposed* to be empty, we delete any entries in the
3109 # copied directory.
3110 for entry in entries.keys():
3111 del_path = dest_path + '/' + entry
3112 # Delete but don't prune.
3113 self._delete_path(del_path)
3114 else:
3115 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
3116 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3117 msg = msg + "attempted to create a branch that already exists."
3118 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3120 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3121 path = None, parent_source_prefix = None,
3122 preferred_revnum = None, prune_ok = None):
3123 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3124 SOURCES, and recurse into the child items.
3126 DEST_PREFIX is the prefix of the destination directory, e.g.
3127 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3128 FillSource classes that are candidates to be copied to the
3129 destination. DEST_KEY is the key in self.nodes_db to the
3130 destination, or None if the destination does not yet exist.
3132 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3133 are at the top level, e.g. '/tags/my_tag'.
3135 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3136 the parent directory, and PREFERRED_REVNUM is an int which is the
3137 source revision number that the caller (who may have copied KEY's
3138 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3139 then no revision is preferable to any other (which probably means
3140 that no copies have happened yet).
3142 PRUNE_OK means that a copy has been made in this recursion, and
3143 it's safe to prune directories that are not in
3144 SYMBOL_FILL.node_tree, provided that said directory has a source
3145 prefix of one of the PARENT_SOURCE_PREFIX.
3147 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3148 should only be passed in by recursive calls."""
3149 # Calculate scores and revnums for all sources
3150 for source in sources:
3151 src_revnum, score = symbol_fill.get_best_revnum(source.key,
3152 preferred_revnum)
3153 source.set_score(score, src_revnum)
3155 # Sort the sources in descending score order so that we will make
3156 # a eventual copy from the source with the highest score.
3157 sources.sort()
3158 copy_source = sources[0]
3160 src_path = _path_join(copy_source.prefix, path)
3161 dest_path = _path_join(dest_prefix, path)
3163 # Figure out if we shall copy to this destination and delete any
3164 # destination path that is in the way.
3165 do_copy = 0
3166 if dest_key is None:
3167 do_copy = 1
3168 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3169 copy_source.revnum != preferred_revnum):
3170 # We are about to replace the destination, so we need to remove
3171 # it before we perform the copy.
3172 self._delete_path(dest_path)
3173 do_copy = 1
3175 if do_copy:
3176 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3177 copy_source.revnum)
3178 prune_ok = 1
3179 else:
3180 dest_entries = self._get_node(dest_key)
3182 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3183 # elements and the values are lists of FillSource classes where
3184 # this path element exists.
3185 src_entries = {}
3186 for source in sources:
3187 for entry, key in symbol_fill.node_tree[source.key].items():
3188 if entry[0] == '/': # Skip flags
3189 continue
3190 if not src_entries.has_key(entry):
3191 src_entries[entry] = []
3192 src_entries[entry].append(FillSource(source.prefix, key))
3194 if prune_ok:
3195 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3196 delete_list = [ ]
3197 for entry in dest_entries.keys():
3198 if not src_entries.has_key(entry):
3199 delete_list.append(entry)
3200 if delete_list:
3201 if not self.new_nodes.has_key(dest_key):
3202 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3203 # Sort the delete list to get "diffable" dumpfiles.
3204 delete_list.sort()
3205 for entry in delete_list:
3206 self._fast_delete_path(dest_path, dest_entries, entry)
3208 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3209 src_keys = src_entries.keys()
3210 src_keys.sort()
3211 for src_key in src_keys:
3212 if dest_entries.has_key(src_key):
3213 next_dest_key = dest_entries[src_key]
3214 else:
3215 next_dest_key = None
3216 self._fill(symbol_fill, dest_prefix, next_dest_key,
3217 src_entries[src_key], _path_join(path, src_key),
3218 copy_source.prefix, sources[0].revnum, prune_ok)
3220 def _synchronize_default_branch(self, svn_commit):
3221 """Propagate any changes that happened on a non-trunk default
3222 branch to the trunk of the repository. See
3223 CVSCommit._post_commit() for details on why this is necessary."""
3224 for cvs_rev in svn_commit.cvs_revs:
3225 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3226 if self._path_exists(cvs_rev.svn_trunk_path):
3227 # Delete the path on trunk...
3228 self._delete_path(cvs_rev.svn_trunk_path)
3229 # ...and copy over from branch
3230 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
3231 svn_commit.motivating_revnum)
3232 elif cvs_rev.op == OP_DELETE:
3233 # delete trunk path
3234 self._delete_path(cvs_rev.svn_trunk_path)
3235 else:
3236 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3237 % cvs_rev.op)
3238 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3240 def commit(self, svn_commit):
3241 """Add an SVNCommit to the SVNRepository, incrementing the
3242 Repository revision number, and changing the repository. Invoke
3243 the delegates' _start_commit() method."""
3245 if svn_commit.revnum == 2:
3246 self._initialize_repository(svn_commit.get_date())
3248 self._start_commit(svn_commit)
3250 if svn_commit.symbolic_name:
3251 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3252 svn_commit.symbolic_name)
3253 self._fill_symbolic_name(svn_commit)
3254 elif svn_commit.motivating_revnum:
3255 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3256 % svn_commit.motivating_revnum)
3257 self._synchronize_default_branch(svn_commit)
3258 else: # This actually commits CVSRevisions
3259 if len(svn_commit.cvs_revs) > 1: plural = "s"
3260 else: plural = ""
3261 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3262 % (len(svn_commit.cvs_revs), plural))
3263 for cvs_rev in svn_commit.cvs_revs:
3264 # See comment in CVSCommit._commit() for what this is all
3265 # about. Note that although asking self._path_exists() is
3266 # somewhat expensive, we only do it if the first two (cheap)
3267 # tests succeed first.
3268 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3269 and (cvs_rev.rev == "1.1.1.1")
3270 and self._path_exists(cvs_rev.svn_path)):
3271 if cvs_rev.op == OP_ADD:
3272 self._add_path(cvs_rev)
3273 elif cvs_rev.op == OP_CHANGE:
3274 # Fix for Issue #74:
3276 # Here's the scenario. You have file FOO that is imported
3277 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3278 # the file exists.
3280 # Moving forward in time, FOO is deleted on the default
3281 # branch (r1.1.1.2). cvs2svn determines that this delete
3282 # also needs to happen on trunk, so FOO is deleted on
3283 # trunk.
3285 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3286 # not 'dead', we assume it's a change). However, since
3287 # our trunk file has been deleted, svnadmin blows up--you
3288 # can't change a file that doesn't exist!
3290 # Soooo... we just check the path, and if it doesn't
3291 # exist, we do an add... if the path does exist, it's
3292 # business as usual.
3293 if not self._path_exists(cvs_rev.svn_path):
3294 self._add_path(cvs_rev)
3295 else:
3296 self._change_path(cvs_rev)
3298 if cvs_rev.op == OP_DELETE:
3299 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3301 def cleanup(self):
3302 """Callback for the Cleanup.register in self.__init__."""
3303 self.revs_db = None
3304 self.nodes_db = None
3306 def add_delegate(self, delegate):
3307 """Adds DELEGATE to self.delegates.
3309 For every delegate you add, as soon as SVNRepositoryMirror
3310 performs a repository action method, SVNRepositoryMirror will call
3311 the delegate's corresponding repository action method. Multiple
3312 delegates will be called in the order that they are added. See
3313 SVNRepositoryMirrorDelegate for more information."""
3314 self.delegates.append(delegate)
3316 def _invoke_delegates(self, method, *args):
3317 """Iterate through each of our delegates, in the order that they
3318 were added, and call the delegate's method named METHOD with the
3319 arguments in ARGS."""
3320 for delegate in self.delegates:
3321 getattr(delegate, method)(*args)
3323 def finish(self):
3324 """Calls the delegate finish method."""
3325 self._end_commit()
3326 self._invoke_delegates('finish')
3327 self.cleanup()
3330 class SVNCommitItem:
3331 """A wrapper class for CVSRevision objects upon which
3332 Subversion-related data (such as properties) may be hung."""
3334 def __init__(self, c_rev, make_svn_props):
3335 self.c_rev = c_rev
3336 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3337 self.eol_from_mime_type = Ctx().eol_from_mime_type
3338 self.no_default_eol = Ctx().no_default_eol
3339 self.keywords_off = Ctx().keywords_off
3340 self.mime_mapper = Ctx().mime_mapper
3342 # We begin with only a "CVS revision" property.
3343 self.svn_props = { }
3344 if self.set_cvs_revnum_properties:
3345 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3346 make_svn_props = True
3348 # Set mime-type and eol. These two properties are intertwingled;
3349 # follow the conditionals carefully. See also issue #39.
3350 mime_type = None
3351 eol_style = None
3352 keywords = None
3354 if self.mime_mapper:
3355 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3357 if not c_rev.mode == 'b':
3358 if not self.no_default_eol:
3359 eol_style = 'native'
3360 elif mime_type and self.eol_from_mime_type:
3361 if mime_type.startswith("text/"):
3362 eol_style = 'native'
3363 else:
3364 eol_style = None
3365 elif mime_type is None:
3366 # file is kb, and no other mimetype specified
3367 mime_type = 'application/octet-stream'
3369 # Set the svn:keywords property, if appropriate. See issue #2.
3370 if not self.keywords_off and (c_rev.mode is None or c_rev.mode == 'kv' or
3371 c_rev.mode == 'kvl'):
3372 keywords = SVN_KEYWORDS_VALUE
3374 # Remember if we need to filter the EOLs. We can't use self.svn_props
3375 # becase they are only set on the first revision and we need to filter
3376 # all revisions.
3377 self.needs_eol_filter = eol_style == 'native'
3379 # Remember if this file has svn:keywords set
3380 self.has_keywords = keywords is not None
3382 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3383 if make_svn_props:
3384 # Tack on the executableness, if any.
3385 if c_rev.file_executable:
3386 self.svn_props['svn:executable'] = '*'
3388 # Set the svn:keywords property, if appropriate. See issue #2.
3389 if keywords:
3390 self.svn_props['svn:keywords'] = SVN_KEYWORDS_VALUE
3392 if mime_type:
3393 self.svn_props['svn:mime-type'] = mime_type
3395 if eol_style:
3396 self.svn_props['svn:eol-style'] = eol_style
3399 class SVNRepositoryMirrorDelegate:
3400 """Abstract superclass for any delegate to SVNRepositoryMirror.
3401 Subclasses must implement all of the methods below.
3403 For each method, a subclass implements, in its own way, the
3404 Subversion operation implied by the method's name. For example, for
3405 the add_path method, the DumpfileDelegate would write out a
3406 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3407 would merely print that the path is being added to the repository,
3408 and the RepositoryDelegate would actually cause the path to be added
3409 to the Subversion repository that it is creating.
3412 def start_commit(self, svn_commit):
3413 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3414 see subclass implementation for details."""
3415 raise NotImplementedError
3417 def mkdir(self, path):
3418 """PATH is a string; see subclass implementation for details."""
3419 raise NotImplementedError
3421 def add_path(self, s_item):
3422 """S_ITEM is an SVNCommitItem; see subclass implementation for
3423 details."""
3424 raise NotImplementedError
3426 def change_path(self, s_item):
3427 """S_ITEM is an SVNCommitItem; see subclass implementation for
3428 details."""
3429 raise NotImplementedError
3431 def delete_path(self, path):
3432 """PATH is a string; see subclass implementation for
3433 details."""
3434 raise NotImplementedError
3436 def copy_path(self, src_path, dest_path, src_revnum):
3437 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3438 subversion revision number (int); see subclass implementation for
3439 details."""
3440 raise NotImplementedError
3442 def finish(self):
3443 """Perform any cleanup necessary after all revisions have been
3444 committed."""
3445 raise NotImplementedError
3448 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3449 """Create a Subversion dumpfile."""
3451 def __init__(self, dumpfile_path=None):
3452 """Return a new DumpfileDelegate instance, attached to a dumpfile
3453 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3455 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3456 property on files, when they are changed due to a corresponding
3457 CVS revision.
3459 If Ctx().mime_mapper is not None, then it is a MimeMapper
3460 instance, used to determine whether or not to set the
3461 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3462 is None, files marked with the CVS 'kb' flag will receive a mime
3463 type of "application/octet-stream".
3465 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3466 'native' for files not marked with the CVS 'kb' flag, except as
3467 superseded by Ctx().eol_from_mime_type (see below).
3469 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3470 to 'native' for all files to which Ctx().mime_mapper assigns a
3471 mime type beginning with "text/", and don't set 'svn:eol-style'
3472 for files assigned a type not beginning with "text/".
3473 """
3474 if dumpfile_path:
3475 self.dumpfile_path = dumpfile_path
3476 else:
3477 self.dumpfile_path = Ctx().dumpfile
3478 self.path_encoding = Ctx().encoding
3480 self.dumpfile = open(self.dumpfile_path, 'wb')
3481 self._write_dumpfile_header(self.dumpfile)
3483 def _write_dumpfile_header(self, dumpfile):
3484 # Initialize the dumpfile with the standard headers.
3486 # Since the CVS repository doesn't have a UUID, and the Subversion
3487 # repository will be created with one anyway, we don't specify a
3488 # UUID in the dumpflie
3489 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3491 def _utf8_path(self, path):
3492 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3493 encoded in self.path_encoding."""
3494 try:
3495 # Log messages can be converted with the 'replace' strategy,
3496 # but we can't afford any lossiness here.
3497 unicode_path = unicode(path, self.path_encoding, 'strict')
3498 return unicode_path.encode('utf-8')
3499 except UnicodeError:
3500 print "Unable to convert a path '%s' to internal encoding." % path
3501 print "Consider rerunning with (for example) '--encoding=latin1'"
3502 sys.exit(1)
3504 def start_commit(self, svn_commit):
3505 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3507 self.revision = svn_commit.revnum
3509 # The start of a new commit typically looks like this:
3511 # Revision-number: 1
3512 # Prop-content-length: 129
3513 # Content-length: 129
3515 # K 7
3516 # svn:log
3517 # V 27
3518 # Log message for revision 1.
3519 # K 10
3520 # svn:author
3521 # V 7
3522 # jrandom
3523 # K 8
3524 # svn:date
3525 # V 27
3526 # 2003-04-22T22:57:58.132837Z
3527 # PROPS-END
3529 # Notice that the length headers count everything -- not just the
3530 # length of the data but also the lengths of the lengths, including
3531 # the 'K ' or 'V ' prefixes.
3533 # The reason there are both Prop-content-length and Content-length
3534 # is that the former includes just props, while the latter includes
3535 # everything. That's the generic header form for any entity in a
3536 # dumpfile. But since revisions only have props, the two lengths
3537 # are always the same for revisions.
3539 # Calculate the total length of the props section.
3540 props = svn_commit.get_revprops()
3541 prop_names = props.keys()
3542 prop_names.sort()
3543 total_len = 10 # len('PROPS-END\n')
3544 for propname in prop_names:
3545 if props[propname] is None:
3546 continue
3547 klen = len(propname)
3548 klen_len = len('K %d' % klen)
3549 vlen = len(props[propname])
3550 vlen_len = len('V %d' % vlen)
3551 # + 4 for the four newlines within a given property's section
3552 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3554 # Print the revision header and props
3555 self.dumpfile.write('Revision-number: %d\n'
3556 'Prop-content-length: %d\n'
3557 'Content-length: %d\n'
3558 '\n'
3559 % (self.revision, total_len, total_len))
3561 for propname in prop_names:
3562 if props[propname] is None:
3563 continue
3564 self.dumpfile.write('K %d\n'
3565 '%s\n'
3566 'V %d\n'
3567 '%s\n' % (len(propname),
3568 propname,
3569 len(props[propname]),
3570 props[propname]))
3572 self.dumpfile.write('PROPS-END\n')
3573 self.dumpfile.write('\n')
3575 def mkdir(self, path):
3576 """Emit the creation of directory PATH."""
3577 self.dumpfile.write("Node-path: %s\n"
3578 "Node-kind: dir\n"
3579 "Node-action: add\n"
3580 "Content-length: 10\n"
3581 "\n"
3582 "\n" % self._utf8_path(path))
3584 def _add_or_change_path(self, s_item, op):
3585 """Emit the addition or change corresponding to S_ITEM.
3586 OP is either the constant OP_ADD or OP_CHANGE."""
3588 # Validation stuffs
3589 if op == OP_ADD:
3590 action = 'add'
3591 elif op == OP_CHANGE:
3592 action = 'change'
3593 else:
3594 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3595 % (error_prefix, op))
3596 sys.exit(1)
3598 # Convenience variables
3599 c_rev = s_item.c_rev
3600 svn_props = s_item.svn_props
3602 # The property handling here takes advantage of an undocumented
3603 # but IMHO consistent feature of the Subversion dumpfile-loading
3604 # code. When a node's properties aren't mentioned (that is, the
3605 # "Prop-content-length:" header is absent, no properties are
3606 # listed at all, and there is no "PROPS-END\n" line) then no
3607 # change is made to the node's properties.
3609 # This is consistent with the way dumpfiles behave w.r.t. text
3610 # content changes, so I'm comfortable relying on it. If you
3611 # commit a change to *just* the properties of some node that
3612 # already has text contents from a previous revision, then in the
3613 # dumpfile output for the prop change, no "Text-content-length:"
3614 # nor "Text-content-md5:" header will be present, and the text of
3615 # the file will not be given. But this does not cause the file's
3616 # text to be erased! It simply remains unchanged.
3618 # This works out great for cvs2svn, due to lucky coincidences:
3620 # For files, the only properties we ever set are set in the first
3621 # revision; all other revisions (including on branches) inherit
3622 # from that. After the first revision, we never change file
3623 # properties, therefore, there is no need to remember the full set
3624 # of properties on a given file once we've set it.
3626 # For directories, the only property we set is "svn:ignore", and
3627 # while we may change it after the first revision, we always do so
3628 # based on the contents of a ".cvsignore" file -- in other words,
3629 # CVS is doing the remembering for us, so we still don't have to
3630 # preserve the previous value of the property ourselves.
3632 # Calculate the (sorted-by-name) property string and length, if any.
3633 prop_contents = ''
3634 prop_names = svn_props.keys()
3635 prop_names.sort()
3636 for pname in prop_names:
3637 pval = svn_props[pname]
3638 prop_contents = prop_contents + \
3639 'K %d\n%s\nV %d\n%s\n' \
3640 % (len(pname), pname, len(pval), pval)
3641 if prop_contents:
3642 prop_contents = prop_contents + 'PROPS-END\n'
3643 props_len = len(prop_contents)
3644 else:
3645 props_len = 0
3647 props_header = ''
3648 if props_len:
3649 props_header = 'Prop-content-length: %d\n' % props_len
3651 # treat .cvsignore as a directory property
3652 dir_path, basename = os.path.split(c_rev.svn_path)
3653 if basename == ".cvsignore":
3654 ignore_vals = generate_ignores(c_rev)
3655 ignore_contents = '\n'.join(ignore_vals)
3656 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3657 (len(ignore_contents), ignore_contents))
3658 ignore_contents = ignore_contents + 'PROPS-END\n'
3659 ignore_len = len(ignore_contents)
3661 # write headers, then props
3662 self.dumpfile.write('Node-path: %s\n'
3663 'Node-kind: dir\n'
3664 'Node-action: change\n'
3665 'Prop-content-length: %d\n'
3666 'Content-length: %d\n'
3667 '\n'
3668 '%s'
3669 % (self._utf8_path(dir_path), ignore_len,
3670 ignore_len, ignore_contents))
3672 # If the file has keywords, we must use -kk to prevent CVS/RCS from
3673 # expanding the keywords because they must be unexpanded in the
3674 # repository, or Subversion will get confused.
3675 if s_item.has_keywords:
3676 pipe_cmd, pipe = get_co_pipe(c_rev, '-kk')
3677 else:
3678 pipe_cmd, pipe = get_co_pipe(c_rev)
3680 self.dumpfile.write('Node-path: %s\n'
3681 'Node-kind: file\n'
3682 'Node-action: %s\n'
3683 '%s' # no property header if no props
3684 'Text-content-length: '
3685 % (self._utf8_path(c_rev.svn_path),
3686 action, props_header))
3688 pos = self.dumpfile.tell()
3690 self.dumpfile.write('0000000000000000\n'
3691 'Text-content-md5: 00000000000000000000000000000000\n'
3692 'Content-length: 0000000000000000\n'
3693 '\n')
3695 if prop_contents:
3696 self.dumpfile.write(prop_contents)
3698 # Insert a filter to convert all EOLs to LFs if neccessary
3699 if s_item.needs_eol_filter:
3700 data_reader = LF_EOL_Filter(pipe.fromchild)
3701 else:
3702 data_reader = pipe.fromchild
3704 # Insert the rev contents, calculating length and checksum as we go.
3705 checksum = md5.new()
3706 length = 0
3707 while True:
3708 buf = data_reader.read(PIPE_READ_SIZE)
3709 if buf == '':
3710 break
3711 checksum.update(buf)
3712 length = length + len(buf)
3713 self.dumpfile.write(buf)
3715 pipe.fromchild.close()
3716 error_output = pipe.childerr.read()
3717 exit_status = pipe.wait()
3718 if exit_status:
3719 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3720 "and the following output:\n"
3721 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3723 # Go back to patch up the length and checksum headers:
3724 self.dumpfile.seek(pos, 0)
3725 # We left 16 zeros for the text length; replace them with the real
3726 # length, padded on the left with spaces:
3727 self.dumpfile.write('%16d' % length)
3728 # 16... + 1 newline + len('Text-content-md5: ') == 35
3729 self.dumpfile.seek(pos + 35, 0)
3730 self.dumpfile.write(checksum.hexdigest())
3731 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3732 self.dumpfile.seek(pos + 84, 0)
3733 # The content length is the length of property data, text data,
3734 # and any metadata around/inside around them.
3735 self.dumpfile.write('%16d' % (length + props_len))
3736 # Jump back to the end of the stream
3737 self.dumpfile.seek(0, 2)
3739 # This record is done (write two newlines -- one to terminate
3740 # contents that weren't themselves newline-termination, one to
3741 # provide a blank line for readability.
3742 self.dumpfile.write('\n\n')
3744 def add_path(self, s_item):
3745 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
3746 self._add_or_change_path(s_item, OP_ADD)
3748 def change_path(self, s_item):
3749 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
3750 self._add_or_change_path(s_item, OP_CHANGE)
3752 def delete_path(self, path):
3753 """Emit the deletion of PATH."""
3754 self.dumpfile.write('Node-path: %s\n'
3755 'Node-action: delete\n'
3756 '\n' % self._utf8_path(path))
3758 def copy_path(self, src_path, dest_path, src_revnum):
3759 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3760 # We don't need to include "Node-kind:" for copies; the loader
3761 # ignores it anyway and just uses the source kind instead.
3762 self.dumpfile.write('Node-path: %s\n'
3763 'Node-action: add\n'
3764 'Node-copyfrom-rev: %d\n'
3765 'Node-copyfrom-path: /%s\n'
3766 '\n'
3767 % (self._utf8_path(dest_path),
3768 src_revnum,
3769 self._utf8_path(src_path)))
3771 def finish(self):
3772 """Perform any cleanup necessary after all revisions have been
3773 committed."""
3774 self.dumpfile.close()
3777 class RepositoryDelegate(DumpfileDelegate):
3778 """Creates a new Subversion Repository. DumpfileDelegate does all
3779 of the heavy lifting."""
3780 def __init__(self):
3781 self.svnadmin = Ctx().svnadmin
3782 self.target = Ctx().target
3783 if not Ctx().existing_svnrepos:
3784 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3785 if Ctx().fs_type and Ctx().fs_type != 'bdb':
3786 # User specified something other than bdb.
3787 run_command('%s create %s "%s"' % (self.svnadmin,
3788 "--fs-type=%s" % Ctx().fs_type,
3789 self.target))
3790 elif Ctx().fs_type:
3791 # User explicitly specified bdb.
3793 # Since this is a BDB repository, pass --bdb-txn-nosync,
3794 # because it gives us a 4-5x speed boost (if cvs2svn is
3795 # creating the repository, cvs2svn should be the only program
3796 # accessing the svn repository (until cvs is done, at least)).
3797 # But we'll turn no-sync off in self.finish(), unless
3798 # instructed otherwise.
3799 run_command('%s create %s %s "%s"' % (self.svnadmin,
3800 "--fs-type=bdb",
3801 "--bdb-txn-nosync",
3802 self.target))
3803 else:
3804 # User didn't say what kind repository (bdb, fsfs, etc).
3805 # We still pass --bdb-txn-nosync. It's a no-op if the default
3806 # repository type doesn't support it, but we definitely want
3807 # it if BDB is the default.
3808 run_command('%s create %s "%s"' % (self.svnadmin,
3809 "--bdb-txn-nosync",
3810 self.target))
3813 # Since the output of this run is a repository, not a dumpfile,
3814 # the temporary dumpfiles we create should go in the tmpdir.
3815 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
3817 # This is 1 if a commit is in progress, otherwise None.
3818 self._commit_in_progress = None
3820 self.dumpfile = open(self.dumpfile_path, 'w+b')
3821 self.loader_pipe = Popen3('%s load -q "%s"' % (self.svnadmin, self.target),
3822 True)
3823 self.loader_pipe.fromchild.close()
3824 try:
3825 self._write_dumpfile_header(self.loader_pipe.tochild)
3826 except IOError:
3827 sys.stderr.write("%s: svnadmin failed with the following output while "
3828 "loading the dumpfile:\n" % (error_prefix))
3829 sys.stderr.write(self.loader_pipe.childerr.read())
3830 sys.exit(1)
3832 def _feed_pipe(self):
3833 """Feed the revision stored in the dumpfile to the svnadmin
3834 load pipe."""
3835 self.dumpfile.seek(0)
3836 while 1:
3837 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3838 if not len(data):
3839 break
3840 try:
3841 self.loader_pipe.tochild.write(data)
3842 except IOError:
3843 sys.stderr.write("%s: svnadmin failed with the following output while "
3844 "loading the dumpfile:\n" % (error_prefix))
3845 sys.stderr.write(self.loader_pipe.childerr.read())
3846 sys.exit(1)
3848 def start_commit(self, svn_commit):
3849 """Start a new commit. If a commit is already in progress, close
3850 the dumpfile, load it into the svn repository, open a new
3851 dumpfile, and write the header into it."""
3852 if self._commit_in_progress:
3853 self._feed_pipe()
3854 self.dumpfile.seek(0)
3855 self.dumpfile.truncate()
3856 DumpfileDelegate.start_commit(self, svn_commit)
3857 self._commit_in_progress = 1
3859 def finish(self):
3860 """Loads the last commit into the repository."""
3861 self._feed_pipe()
3862 self.dumpfile.close()
3863 self.loader_pipe.tochild.close()
3864 error_output = self.loader_pipe.childerr.read()
3865 exit_status = self.loader_pipe.wait()
3866 if exit_status:
3867 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3868 'and the following output:\n'
3869 '%s' % (error_prefix, exit_status, error_output))
3870 os.remove(self.dumpfile_path)
3872 # If this is a BDB repository, and we created the repository, and
3873 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
3874 # line in the DB_CONFIG file, because txn syncing should be on by
3875 # default in BDB repositories.
3877 # We determine if this is a BDB repository by looking for the
3878 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
3879 # checking Ctx().fs_type. That way this code will Do The Right
3880 # Thing in all circumstances.
3881 db_config = os.path.join(self.target, "db/DB_CONFIG")
3882 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
3883 and os.path.exists(db_config)):
3884 no_sync = 'set_flags DB_TXN_NOSYNC\n'
3886 contents = open(db_config, 'r').readlines()
3887 index = contents.index(no_sync)
3888 contents[index] = '# ' + no_sync
3889 contents = open(db_config, 'w').writelines(contents)
3892 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3893 """Makes no changes to the disk, but writes out information to
3894 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3895 print statements will state that we're doing something, when in
3896 reality, we aren't doing anything other than printing out that we're
3897 doing something. Kind of zen, really."""
3898 def __init__(self, total_revs):
3899 self.total_revs = total_revs
3901 def start_commit(self, svn_commit):
3902 """Prints out the Subversion revision number of the commit that is
3903 being started."""
3904 Log().write(LOG_VERBOSE, "=" * 60)
3905 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3906 (svn_commit.revnum, self.total_revs))
3908 def mkdir(self, path):
3909 """Print a line stating that we are creating directory PATH."""
3910 Log().write(LOG_VERBOSE, " New Directory", path)
3912 def add_path(self, s_item):
3913 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
3914 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
3916 def change_path(self, s_item):
3917 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
3918 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
3920 def delete_path(self, path):
3921 """Print a line stating that we are 'deleting' PATH."""
3922 Log().write(LOG_VERBOSE, " Deleting", path)
3924 def copy_path(self, src_path, dest_path, src_revnum):
3925 """Print a line stating that we are 'copying' revision SRC_REVNUM
3926 of SRC_PATH to DEST_PATH."""
3927 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3928 Log().write(LOG_VERBOSE, " to", dest_path)
3930 def finish(self):
3931 """State that we are done creating our repository."""
3932 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3933 Log().write(LOG_QUIET, "Done.")
3935 # This should be a local to pass1,
3936 # but Python 2.0 does not support nested scopes.
3937 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3938 def pass1():
3939 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3940 cd = CollectData()
3942 def visit_file(baton, dirname, files):
3943 cd = baton
3944 for fname in files:
3945 if fname[-2:] != ',v':
3946 continue
3947 cd.found_valid_file = 1
3948 pathname = os.path.join(dirname, fname)
3949 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3950 # drop the 'Attic' portion from the pathname for the canonical name.
3951 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3952 else:
3953 # If this file also exists in the attic, it's a fatal error
3954 attic_path = os.path.join(dirname, 'Attic', fname)
3955 if os.path.exists(attic_path):
3956 err = "%s: A CVS repository cannot contain both %s and %s" \
3957 % (error_prefix, pathname, attic_path)
3958 sys.stderr.write(err + '\n')
3959 cd.fatal_errors.append(err)
3960 cd.set_fname(pathname, pathname)
3961 Log().write(LOG_NORMAL, pathname)
3962 try:
3963 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3964 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3965 err = "%s: '%s' is not a valid ,v file" \
3966 % (error_prefix, pathname)
3967 sys.stderr.write(err + '\n')
3968 cd.fatal_errors.append(err)
3969 except:
3970 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3971 raise
3973 os.path.walk(Ctx().cvsroot, visit_file, cd)
3974 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3976 cd.write_symbol_db()
3978 if len(cd.fatal_errors) > 0:
3979 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3980 + "Error summary:\n"
3981 + "\n".join(cd.fatal_errors)
3982 + "\nExited due to fatal error(s).")
3984 if cd.found_valid_file is None:
3985 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3986 + "Are you absolutely certain you are pointing cvs2svn\n"
3987 + "at a CVS repository?\n"
3988 + "\nExited due to fatal error(s).")
3990 StatsKeeper().reset_c_rev_info()
3991 StatsKeeper().archive()
3992 Log().write(LOG_QUIET, "Done")
3994 def pass2():
3995 "Pass 2: clean up the revision information."
3997 symbol_db = SymbolDatabase()
3998 symbol_db.read()
4000 # Convert the list of regexps to a list of strings
4001 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4003 error_detected = 0
4005 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4006 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4007 if blocked_excludes:
4008 for branch, blockers in blocked_excludes.items():
4009 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4010 "excluded because the following symbols depend "
4011 "on it:\n" % (branch))
4012 for blocker in blockers:
4013 sys.stderr.write(" '%s'\n" % (blocker))
4014 sys.stderr.write("\n")
4015 error_detected = 1
4017 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4018 invalid_forced_tags = [ ]
4019 for forced_tag in Ctx().forced_tags:
4020 if excludes.has_key(forced_tag):
4021 continue
4022 if symbol_db.branch_has_commit(forced_tag):
4023 invalid_forced_tags.append(forced_tag)
4024 if invalid_forced_tags:
4025 sys.stderr.write(error_prefix + ": The following branches cannot be "
4026 "forced to be tags because they have commits:\n")
4027 for tag in invalid_forced_tags:
4028 sys.stderr.write(" '%s'\n" % (tag))
4029 sys.stderr.write("\n")
4030 error_detected = 1
4032 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4033 mismatches = symbol_db.find_mismatches(excludes)
4034 def is_not_forced(mismatch):
4035 name = mismatch[0]
4036 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4037 mismatches = filter(is_not_forced, mismatches)
4038 if mismatches:
4039 sys.stderr.write(error_prefix + ": The following symbols are tags "
4040 "in some files and branches in others.\nUse "
4041 "--force-tag, --force-branch and/or --exclude to "
4042 "resolve the symbols.\n")
4043 for name, tag_count, branch_count, commit_count in mismatches:
4044 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4045 "%d files and has commits in %d files.\n"
4046 % (name, tag_count, branch_count, commit_count))
4047 error_detected = 1
4049 # Bail out now if we found errors
4050 if error_detected:
4051 sys.exit(1)
4053 # Create the tags database
4054 tags_db = TagsDatabase(DB_OPEN_NEW)
4055 for tag in symbol_db.tags.keys():
4056 if tag not in Ctx().forced_branches:
4057 tags_db[tag] = None
4058 for tag in Ctx().forced_tags:
4059 tags_db[tag] = None
4061 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4063 # We may have recorded some changes in revisions' timestamp. We need to
4064 # scan for any other files which may have had the same log message and
4065 # occurred at "the same time" and change their timestamps, too.
4067 # read the resync data file
4068 def read_resync(fname):
4069 "Read the .resync file into memory."
4071 ### note that we assume that we can hold the entire resync file in
4072 ### memory. really large repositories with whacky timestamps could
4073 ### bust this assumption. should that ever happen, then it is possible
4074 ### to split the resync file into pieces and make multiple passes,
4075 ### using each piece.
4078 # A digest maps to a sequence of lists which specify a lower and upper
4079 # time bound for matching up the commit. We keep a sequence of these
4080 # because a number of checkins with the same log message (e.g. an empty
4081 # log message) could need to be remapped. We also make them a list because
4082 # we will dynamically expand the lower/upper bound as we find commits
4083 # that fall into a particular msg and time range.
4085 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4087 resync = { }
4089 for line in fileinput.FileInput(fname):
4090 t1 = int(line[:8], 16)
4091 digest = line[9:DIGEST_END_IDX]
4092 t2 = int(line[DIGEST_END_IDX+1:], 16)
4093 t1_l = t1 - COMMIT_THRESHOLD/2
4094 t1_u = t1 + COMMIT_THRESHOLD/2
4095 if resync.has_key(digest):
4096 resync[digest].append([t1_l, t1_u, t2])
4097 else:
4098 resync[digest] = [ [t1_l, t1_u, t2] ]
4100 # For each digest, sort the resync items in it in increasing order,
4101 # based on the lower time bound.
4102 digests = resync.keys()
4103 for digest in digests:
4104 (resync[digest]).sort()
4106 return resync
4108 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4110 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4111 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4113 # process the revisions file, looking for items to clean up
4114 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4115 c_rev = CVSRevision(Ctx(), line[:-1])
4117 # Skip this entire revision if it's on an excluded branch
4118 if excludes.has_key(c_rev.branch_name):
4119 continue
4121 # Remove all references to excluded tags and branches
4122 def not_excluded(symbol, excludes=excludes):
4123 return not excludes.has_key(symbol)
4124 c_rev.branches = filter(not_excluded, c_rev.branches)
4125 c_rev.tags = filter(not_excluded, c_rev.tags)
4127 # Convert all branches that are forced to be tags
4128 for forced_tag in Ctx().forced_tags:
4129 if forced_tag in c_rev.branches:
4130 c_rev.branches.remove(forced_tag)
4131 c_rev.tags.append(forced_tag)
4133 # Convert all tags that are forced to be branches
4134 for forced_branch in Ctx().forced_branches:
4135 if forced_branch in c_rev.tags:
4136 c_rev.tags.remove(forced_branch)
4137 c_rev.branches.append(forced_branch)
4139 # see if this is "near" any of the resync records we
4140 # have recorded for this digest [of the log message].
4141 for record in resync.get(c_rev.digest, []):
4142 if record[0] <= c_rev.timestamp <= record[1]:
4143 # bingo! remap the time on this (record[2] is the new time).
4145 # adjust the time range. we want the COMMIT_THRESHOLD from the
4146 # bounds of the earlier/latest commit in this group.
4147 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4148 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4150 # By default this will be the new timestamp
4151 new_timestamp = record[2]
4152 # If the new timestamp is earlier than that of our previous revision
4153 if record[2] < c_rev.prev_timestamp:
4154 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4155 + " to time %s, which is before previous the time of"
4156 + " revision %s (%s):")
4157 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4158 c_rev.cvs_path, record[2],
4159 c_rev.prev_rev, c_rev.prev_timestamp))
4160 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4161 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4162 # attempted sync time, then sync back to c_rev.prev_timestamp
4163 # + 1...
4164 if (c_rev.prev_timestamp - record[2]) < COMMIT_THRESHOLD:
4165 new_timestamp = c_rev.prev_timestamp + 1
4166 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4167 new_timestamp))
4168 # ...otherwise, make no change
4169 else:
4170 new_timestamp = c_rev.timestamp
4171 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4172 warning_prefix)
4174 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4175 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4176 record[2] - c_rev.timestamp)
4177 Log().write(LOG_VERBOSE, msg)
4179 c_rev.timestamp = new_timestamp
4181 # stop looking for hits
4182 break
4184 output.write(str(c_rev) + "\n")
4185 Log().write(LOG_QUIET, "Done")
4187 def pass3():
4188 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4189 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4190 temp(DATAFILE + SORTED_REVS_SUFFIX))
4191 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4192 Log().write(LOG_QUIET, "Done")
4194 def pass4():
4195 """Iterate through sorted revs, storing them in a database.
4196 If we're not doing a trunk-only conversion, generate the
4197 LastSymbolicNameDatabase, which contains the last CVSRevision
4198 that is a source for each tag or branch.
4200 Log().write(LOG_QUIET,
4201 "Copying CVS revision data from flat file to database...")
4202 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4203 if not Ctx().trunk_only:
4204 Log().write(LOG_QUIET,
4205 "and finding last CVS revisions for all symbolic names...")
4206 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4207 else:
4208 # This is to avoid testing Ctx().trunk_only every time around the loop
4209 class DummyLSNDB:
4210 def noop(*args): pass
4211 log_revision = noop
4212 create_database = noop
4213 last_sym_name_db = DummyLSNDB()
4215 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4216 c_rev = CVSRevision(Ctx(), line[:-1])
4217 cvs_revs_db.log_revision(c_rev)
4218 last_sym_name_db.log_revision(c_rev)
4219 StatsKeeper().record_c_rev(c_rev)
4221 last_sym_name_db.create_database()
4222 StatsKeeper().archive()
4223 Log().write(LOG_QUIET, "Done")
4225 def pass5():
4227 Generate the SVNCommit <-> CVSRevision mapping
4228 databases. CVSCommit._commit also calls SymbolingsLogger to register
4229 CVSRevisions that represent an opening or closing for a path on a
4230 branch or tag. See SymbolingsLogger for more details.
4232 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4234 aggregator = CVSRevisionAggregator()
4235 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4236 c_rev = CVSRevision(Ctx(), line[:-1])
4237 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4238 aggregator.process_revision(c_rev)
4239 aggregator.flush()
4241 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4242 StatsKeeper().archive()
4243 Log().write(LOG_QUIET, "Done")
4245 def pass6():
4246 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4248 if not Ctx().trunk_only:
4249 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4250 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4251 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4252 Log().write(LOG_QUIET, "Done")
4254 def pass7():
4255 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4257 def generate_offsets_for_symbolings():
4258 """This function iterates through all the lines in
4259 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4260 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4261 where SYMBOLIC_NAME is first encountered. This will allow us to
4262 seek to the various offsets in the file and sequentially read only
4263 the openings and closings that we need."""
4265 ###PERF This is a fine example of a db that can be in-memory and
4266 #just flushed to disk when we're done. Later, it can just be sucked
4267 #back into memory.
4268 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4269 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4271 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4272 old_sym = ""
4273 while 1:
4274 fpos = file.tell()
4275 line = file.readline()
4276 if not line:
4277 break
4278 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4279 if not sym == old_sym:
4280 Log().write(LOG_VERBOSE, " ", sym)
4281 old_sym = sym
4282 offsets_db[sym] = fpos
4284 if not Ctx().trunk_only:
4285 generate_offsets_for_symbolings()
4286 Log().write(LOG_QUIET, "Done.")
4288 def pass8():
4289 svncounter = 2 # Repository initialization is 1.
4290 repos = SVNRepositoryMirror()
4291 persistence_manager = PersistenceManager(DB_OPEN_READ)
4293 if (Ctx().target):
4294 if not Ctx().dry_run:
4295 repos.add_delegate(RepositoryDelegate())
4296 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4297 else:
4298 if not Ctx().dry_run:
4299 repos.add_delegate(DumpfileDelegate())
4300 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4302 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4304 while(1):
4305 svn_commit = persistence_manager.get_svn_commit(svncounter)
4306 if not svn_commit:
4307 break
4308 repos.commit(svn_commit)
4309 svncounter += 1
4311 repos.finish()
4313 _passes = [
4314 pass1,
4315 pass2,
4316 pass3,
4317 pass4,
4318 pass5,
4319 pass6,
4320 pass7,
4321 pass8,
4325 class Ctx:
4326 """Session state for this run of cvs2svn. For example, run-time
4327 options are stored here. This class is a Borg, see
4328 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4330 __shared_state = { }
4331 def __init__(self):
4332 self.__dict__ = self.__shared_state
4333 if self.__dict__:
4334 return
4335 # Else, initialize to defaults.
4336 self.cvsroot = None
4337 self.target = None
4338 self.dumpfile = DUMPFILE
4339 self.tmpdir = '.'
4340 self.verbose = 0
4341 self.quiet = 0
4342 self.prune = 1
4343 self.existing_svnrepos = 0
4344 self.dump_only = 0
4345 self.dry_run = 0
4346 self.trunk_only = 0
4347 self.trunk_base = "trunk"
4348 self.tags_base = "tags"
4349 self.branches_base = "branches"
4350 self.encoding = "ascii"
4351 self.mime_types_file = None
4352 self.mime_mapper = None
4353 self.no_default_eol = 0
4354 self.eol_from_mime_type = 0
4355 self.keywords_off = 0
4356 self.use_cvs = None
4357 self.svnadmin = "svnadmin"
4358 self.username = None
4359 self.print_help = 0
4360 self.skip_cleanup = 0
4361 self.cvs_revnums = 0
4362 self.bdb_txn_nosync = 0
4363 self.fs_type = None
4364 self.forced_branches = []
4365 self.forced_tags = []
4366 self.excludes = []
4367 self.symbol_transforms = []
4369 class MimeMapper:
4370 """A class that provides mappings from file names to MIME types.
4371 Note that we should really be using Python's 'mimetypes' module.
4372 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4373 for more."""
4375 def __init__(self):
4376 self.mappings = { }
4378 def set_mime_types_file(self, mime_types_file):
4379 for line in fileinput.input(mime_types_file):
4380 if line.startswith("#"):
4381 continue
4383 # format of a line is something like
4384 # text/plain c h cpp
4385 extensions = line.split()
4386 if len(extensions) < 2:
4387 continue
4388 type = extensions.pop(0)
4389 for ext in extensions:
4390 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4391 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
4392 % (warning_prefix, ext, self.mappings[ext], type))
4393 self.mappings[ext] = type
4396 def get_type_from_filename(self, filename):
4397 basename, extension = os.path.splitext(os.path.basename(filename))
4399 # Extension includes the dot, so strip it (will leave extension
4400 # empty if filename ends with a dot, which is ok):
4401 extension = extension[1:]
4403 # If there is no extension (or the file ends with a period), use
4404 # the base name for mapping. This allows us to set mappings for
4405 # files such as README or Makefile:
4406 if not extension:
4407 extension = basename
4408 if self.mappings.has_key(extension):
4409 return self.mappings[extension]
4410 return None
4413 def convert(start_pass, end_pass):
4414 "Convert a CVS repository to an SVN repository."
4416 cleanup = Cleanup()
4417 times = [ None ] * (end_pass + 1)
4418 times[start_pass - 1] = time.time()
4419 StatsKeeper().set_start_time(time.time())
4420 for i in range(start_pass - 1, end_pass):
4421 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4422 _passes[i]()
4423 times[i + 1] = time.time()
4424 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4425 # Dispose of items in Ctx() not intended to live past the end of the pass
4426 # (Identified by exactly one leading underscore)
4427 for attr in dir(Ctx()):
4428 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4429 and not attr[:6] == "_Ctx__"):
4430 delattr(Ctx(), attr)
4431 if not Ctx().skip_cleanup:
4432 cleanup.cleanup(_passes[i])
4433 StatsKeeper().set_end_time(time.time())
4435 Log().write(LOG_QUIET, StatsKeeper())
4436 if end_pass < 4:
4437 Log().write(LOG_QUIET, '(These are unaltered CVS repository stats and do not\n'
4438 + ' reflect tags or branches excluded via --exclude)\n')
4439 print StatsKeeper().timings()
4442 def usage():
4443 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4444 % os.path.basename(sys.argv[0])
4445 print ' --help, -h print this usage message and exit with success'
4446 print ' --version print the version number'
4447 print ' -q quiet'
4448 print ' -v verbose'
4449 print ' -s PATH path for SVN repos'
4450 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
4451 print ' If only START is given, run only pass START'
4452 print ' (implicitly enables --skip-cleanup)'
4453 print ' --existing-svnrepos load into existing SVN repository'
4454 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4455 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4456 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4457 print ' --dry-run do not create a repository or a dumpfile;'
4458 print ' just print what would happen.'
4459 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4460 print ' (only use this if having problems with RCS)'
4461 print ' --svnadmin=PATH path to the svnadmin program'
4462 print ' --trunk-only convert only trunk commits, not tags nor branches'
4463 print ' --trunk=PATH path for trunk (default: %s)' \
4464 % Ctx().trunk_base
4465 print ' --branches=PATH path for branches (default: %s)' \
4466 % Ctx().branches_base
4467 print ' --tags=PATH path for tags (default: %s)' \
4468 % Ctx().tags_base
4469 print ' --no-prune don\'t prune empty directories'
4470 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4471 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
4472 % Ctx().encoding
4473 print ' --force-branch=NAME force NAME to be a branch'
4474 print ' --force-tag=NAME force NAME to be a tag'
4475 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4476 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4477 print ' use Python regexp and reference syntax respectively'
4478 print ' --username=NAME username for cvs2svn-synthesized commits'
4479 print ' --skip-cleanup prevent the deletion of intermediate files'
4480 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4481 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4482 print ' --cvs-revnums record CVS revision numbers as file properties'
4483 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4484 ' setting svn:mime-type'
4485 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4486 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4487 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
4488 print ' cvs2svn sets svn:keywords on non-binary files to'
4489 print ' "%s")' % SVN_KEYWORDS_VALUE
4491 def main():
4492 # Convenience var, so we don't have to keep instantiating this Borg.
4493 ctx = Ctx()
4495 profiling = None
4496 start_pass = 1
4497 end_pass = len(_passes)
4499 try:
4500 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4501 [ "help", "create", "trunk=",
4502 "username=", "existing-svnrepos",
4503 "branches=", "tags=", "encoding=",
4504 "force-branch=", "force-tag=", "exclude=",
4505 "use-cvs", "mime-types=",
4506 "eol-from-mime-type", "no-default-eol",
4507 "trunk-only", "no-prune", "dry-run",
4508 "dump-only", "dumpfile=", "tmpdir=",
4509 "svnadmin=", "skip-cleanup", "cvs-revnums",
4510 "bdb-txn-nosync", "fs-type=",
4511 "version", "profile",
4512 "keywords-off", "symbol-transform="])
4513 except getopt.GetoptError, e:
4514 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4515 usage()
4516 sys.exit(1)
4518 for opt, value in opts:
4519 if opt == '--version':
4520 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4521 sys.exit(0)
4522 elif opt == '-p':
4523 # Don't cleanup if we're doing incrementals.
4524 ctx.skip_cleanup = 1
4525 if value.find(':') > 0:
4526 start_pass, end_pass = map(int, value.split(':'))
4527 else:
4528 end_pass = start_pass = int(value)
4529 if start_pass > len(_passes) or start_pass < 1:
4530 print '%s: illegal value (%d) for starting pass. '\
4531 'must be 1 through %d.' % (error_prefix, int(start_pass),
4532 len(_passes))
4533 sys.exit(1)
4534 if end_pass < start_pass or end_pass > len(_passes):
4535 print '%s: illegal value (%d) for ending pass. ' \
4536 'must be %d through %d.' % (error_prefix, int(end_pass),
4537 int(start_pass), len(_passes))
4538 sys.exit(1)
4539 elif (opt == '--help') or (opt == '-h'):
4540 ctx.print_help = 1
4541 elif opt == '-v':
4542 Log().log_level = LOG_VERBOSE
4543 ctx.verbose = 1
4544 elif opt == '-q':
4545 Log().log_level = LOG_QUIET
4546 ctx.quiet = 1
4547 elif opt == '-s':
4548 ctx.target = value
4549 elif opt == '--existing-svnrepos':
4550 ctx.existing_svnrepos = 1
4551 elif opt == '--dumpfile':
4552 ctx.dumpfile = value
4553 elif opt == '--tmpdir':
4554 ctx.tmpdir = value
4555 elif opt == '--use-cvs':
4556 ctx.use_cvs = 1
4557 elif opt == '--svnadmin':
4558 ctx.svnadmin = value
4559 elif opt == '--trunk-only':
4560 ctx.trunk_only = 1
4561 elif opt == '--trunk':
4562 if not value:
4563 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4564 ctx.trunk_base = value
4565 elif opt == '--branches':
4566 if not value:
4567 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4568 ctx.branches_base = value
4569 elif opt == '--tags':
4570 if not value:
4571 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4572 ctx.tags_base = value
4573 elif opt == '--no-prune':
4574 ctx.prune = None
4575 elif opt == '--dump-only':
4576 ctx.dump_only = 1
4577 elif opt == '--dry-run':
4578 ctx.dry_run = 1
4579 elif opt == '--encoding':
4580 ctx.encoding = value
4581 elif opt == '--force-branch':
4582 ctx.forced_branches.append(value)
4583 elif opt == '--force-tag':
4584 ctx.forced_tags.append(value)
4585 elif opt == '--exclude':
4586 try:
4587 ctx.excludes.append(re.compile('^' + value + '$'))
4588 except re.error, e:
4589 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4590 elif opt == '--mime-types':
4591 ctx.mime_types_file = value
4592 elif opt == '--eol-from-mime-type':
4593 ctx.eol_from_mime_type = 1
4594 elif opt == '--no-default-eol':
4595 ctx.no_default_eol = 1
4596 elif opt == '--keywords-off':
4597 ctx.keywords_off = 1
4598 elif opt == '--username':
4599 ctx.username = value
4600 elif opt == '--skip-cleanup':
4601 ctx.skip_cleanup = 1
4602 elif opt == '--cvs-revnums':
4603 ctx.cvs_revnums = 1
4604 elif opt == '--bdb-txn-nosync':
4605 ctx.bdb_txn_nosync = 1
4606 elif opt == '--fs-type':
4607 ctx.fs_type = value
4608 elif opt == '--create':
4609 sys.stderr.write(warning_prefix +
4610 ': The behaviour produced by the --create option is now the '
4611 'default,\nand passing the option is deprecated.\n')
4612 elif opt == '--profile':
4613 profiling = 1
4614 elif opt == '--symbol-transform':
4615 ctx.symbol_transforms.append(value.split(":"))
4617 if ctx.print_help:
4618 usage()
4619 sys.exit(0)
4621 # Consistency check for options and arguments.
4622 if len(args) == 0:
4623 usage()
4624 sys.exit(1)
4626 if len(args) > 1:
4627 sys.stderr.write(error_prefix +
4628 ": must pass only one CVS repository.\n")
4629 usage()
4630 sys.exit(1)
4632 ctx.cvsroot = args[0]
4634 if not os.path.isdir(ctx.cvsroot):
4635 sys.stderr.write(error_prefix +
4636 ": the given CVS repository path '%s' is not an "
4637 "existing directory.\n" % ctx.cvsroot)
4638 sys.exit(1)
4640 if ctx.use_cvs:
4641 # Ascend above the specified root if necessary, to find the cvs_repository
4642 # (a directory containing a CVSROOT directory) and the cvs_module (the
4643 # path of the conversion root within the cvs repository)
4644 # NB: cvs_module must be seperated by '/' *not* by os.sep .
4645 ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
4646 prev_cvs_repository = None
4647 ctx.cvs_module = ""
4648 while prev_cvs_repository != ctx.cvs_repository:
4649 if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
4650 break
4651 prev_cvs_repository = ctx.cvs_repository
4652 ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
4653 ctx.cvs_module = module_component + "/" + ctx.cvs_module
4654 else:
4655 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
4656 sys.stderr.write(error_prefix +
4657 ": the path '%s' is not a CVS repository, nor a path " \
4658 "within a CVS repository. A CVS repository contains " \
4659 "a CVSROOT directory within its root directory.\n" \
4660 % ctx.cvsroot)
4661 sys.exit(1)
4662 os.environ['CVSROOT'] = ctx.cvs_repository
4664 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
4665 sys.stderr.write(error_prefix +
4666 ": must pass one of '-s' or '--dump-only'.\n")
4667 sys.exit(1)
4669 def not_both(opt1val, opt1name, opt2val, opt2name):
4670 if opt1val and opt2val:
4671 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4672 % (opt1name, opt2name))
4673 sys.exit(1)
4675 not_both(ctx.target, '-s',
4676 ctx.dump_only, '--dump-only')
4678 not_both(ctx.dump_only, '--dump-only',
4679 ctx.existing_svnrepos, '--existing-svnrepos')
4681 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4682 ctx.existing_svnrepos, '--existing-svnrepos')
4684 not_both(ctx.dump_only, '--dump-only',
4685 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4687 not_both(ctx.quiet, '-q',
4688 ctx.verbose, '-v')
4690 not_both(ctx.fs_type, '--fs-type',
4691 ctx.existing_svnrepos, '--existing-svnrepos')
4693 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
4694 sys.stderr.write(error_prefix +
4695 ": cannot pass --bdb-txn-nosync with --fs-type=%s.\n" \
4696 % ctx.fs_type)
4697 sys.exit(1)
4699 if ((string.find(ctx.trunk_base, '/') > -1)
4700 or (string.find(ctx.tags_base, '/') > -1)
4701 or (string.find(ctx.branches_base, '/') > -1)):
4702 sys.stderr.write("%s: cannot pass multicomponent path to "
4703 "--trunk, --tags, or --branches yet.\n"
4704 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4705 "id=7 for details.\n" % error_prefix)
4706 sys.exit(1)
4708 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4709 sys.stderr.write(error_prefix +
4710 ": the svn-repos-path '%s' is not an "
4711 "existing directory.\n" % ctx.target)
4712 sys.exit(1)
4714 if not ctx.dump_only and not ctx.existing_svnrepos \
4715 and (not ctx.dry_run) and os.path.exists(ctx.target):
4716 sys.stderr.write(error_prefix +
4717 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4718 "'--existing-svnrepos'.\n" % ctx.target)
4719 sys.exit(1)
4721 if ctx.mime_types_file:
4722 ctx.mime_mapper = MimeMapper()
4723 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4725 # Make sure the tmp directory exists. Note that we don't check if
4726 # it's empty -- we want to be able to use, for example, "." to hold
4727 # tempfiles. But if we *did* want check if it were empty, we'd do
4728 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
4729 if not os.path.exists(ctx.tmpdir):
4730 os.mkdir(ctx.tmpdir)
4731 elif not os.path.isdir(ctx.tmpdir):
4732 sys.stderr.write(error_prefix +
4733 ": cvs2svn tried to use '%s' for temporary files, but that path\n"
4734 " exists and is not a directory. Please make it be a directory,\n"
4735 " or specify some other directory for temporary files.\n" \
4736 % ctx.tmpdir)
4737 sys.exit(1)
4739 if ctx.use_cvs:
4740 def cvs_ok():
4741 pipe = Popen3('cvs %s --version' % Ctx().cvs_global_arguments, True)
4742 pipe.tochild.close()
4743 pipe.fromchild.read()
4744 errmsg = pipe.childerr.read()
4745 status = pipe.wait()
4746 ok = len(errmsg) == 0 and status == 0
4747 return (ok, status, errmsg)
4749 ctx.cvs_global_arguments = "-q -R"
4750 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4751 if not ok:
4752 ctx.cvs_global_arguments = "-q"
4753 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4755 if not ok:
4756 sys.stderr.write(error_prefix +
4757 ": error executing CVS: status %s, error output:\n" \
4758 % (cvs_exitstatus) + cvs_errmsg)
4760 # But do lock the tmpdir, to avoid process clash.
4761 try:
4762 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4763 except OSError, e:
4764 if e.errno == errno.EACCES:
4765 sys.stderr.write(error_prefix + ": Permission denied:"
4766 + " No write access to output directory.\n")
4767 sys.exit(1)
4768 if e.errno == errno.EEXIST:
4769 sys.stderr.write(error_prefix +
4770 ": cvs2svn is using directory '%s' for temporary files, but\n"
4771 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
4772 " cvs2svn process is currently using '%s' as its temporary\n"
4773 " workspace. If you are certain that is not the case,\n"
4774 " then remove the '%s/cvs2svn.lock' subdirectory.\n" \
4775 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir))
4776 sys.exit(1)
4777 raise
4778 try:
4779 if profiling:
4780 import hotshot
4781 prof = hotshot.Profile('cvs2svn.hotshot')
4782 prof.runcall(convert, start_pass, end_pass)
4783 prof.close()
4784 else:
4785 convert(start_pass, end_pass)
4786 finally:
4787 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4788 except: pass
4790 if __name__ == '__main__':
4791 main()