Fix bug reported by Arthur Rudolph <arudolph@post.harvard.edu>.
[cvs2svn.git] / cvs2svn
blob73e652dc228474259893dd1643e8362629d4884b
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import string
33 import md5
34 import marshal
35 import errno
36 import popen2
38 # Warnings and errors start with these strings. They are typically
39 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
40 warning_prefix = "WARNING"
41 error_prefix = "ERROR"
43 # Make sure this Python is recent enough.
44 if sys.hexversion < 0x2000000:
45 sys.stderr.write("'%s: Python 2.0 or higher required, "
46 "see www.python.org.\n" % error_prefix)
47 sys.exit(1)
49 # Pretend we have true booleans on older python versions
50 try:
51 True
52 except:
53 True = 1
54 False = 0
56 # Minimal, incomplete, version of popen2.Popen3 for those platforms
57 # for which popen2 does not provide it.
58 try:
59 Popen3 = popen2.Popen3
60 except AttributeError:
61 class Popen3:
62 def __init__(self, cmd, capturestderr):
63 if type(cmd) != str:
64 cmd = " ".join(cmd)
65 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
66 mode='b')
67 def wait(self):
68 return self.fromchild.close() or self.tochild.close() or \
69 self.childerr.close()
71 # DBM module selection
73 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
74 # so that the dbhash module used by anydbm will use bsddb3.
75 try:
76 import bsddb3
77 sys.modules['bsddb'] = sys.modules['bsddb3']
78 except ImportError:
79 pass
81 # 2. These DBM modules are not good for cvs2svn.
82 import anydbm
83 if (anydbm._defaultmod.__name__ == 'dumbdbm'
84 or anydbm._defaultmod.__name__ == 'dbm'):
85 print 'ERROR: your installation of Python does not contain a suitable'
86 print ' DBM module. This script cannot continue.'
87 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
88 print ' for details.'
89 sys.exit(1)
91 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
92 # Unfortunately, gdbm appears not to be trouble free, either.
93 if hasattr(anydbm._defaultmod, 'bsddb') \
94 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
95 try:
96 gdbm = __import__('gdbm')
97 except ImportError:
98 sys.stderr.write(warning_prefix +
99 ': The version of the bsddb module found '
100 'on your computer has been reported to malfunction on some datasets, '
101 'causing KeyError exceptions. You may wish to upgrade your Python to '
102 'version 2.3 or later.\n')
103 else:
104 anydbm._defaultmod = gdbm
106 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
107 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
108 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
110 # This really only matches standard '1.1.1.*'-style vendor revisions.
111 # One could conceivably have a file whose default branch is 1.1.3 or
112 # whatever, or was that at some point in time, with vendor revisions
113 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
114 # is the only time this regexp gets used), we'd have no basis for
115 # assuming that the non-standard vendor branch had ever been the
116 # default branch anyway, so we don't want this to match them anyway.
117 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
119 # If this run's output is a repository, then (in the tmpdir) we use
120 # a dumpfile of this name for repository loads.
122 # If this run's output is a dumpfile, then this is default name of
123 # that dumpfile, but in the current directory (unless the user has
124 # specified a dumpfile path, of course, in which case it will be
125 # wherever the user said).
126 DUMPFILE = 'cvs2svn-dump'
128 # This file appears with different suffixes at different stages of
129 # processing. CVS revisions are cleaned and sorted here, for commit
130 # grouping. See design-notes.txt for details.
131 DATAFILE = 'cvs2svn-data'
133 # This file contains a marshalled copy of all the statistics that we
134 # gather throughout the various runs of cvs2svn. The data stored as a
135 # marshalled dictionary.
136 STATISTICS_FILE = 'cvs2svn-statistics'
138 # This text file contains records (1 per line) that describe svn
139 # filesystem paths that are the opening and closing source revisions
140 # for copies to tags and branches. The format is as follows:
142 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
144 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
145 # SVN_REVNUM are the primary and secondary sorting criteria for
146 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
147 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
148 # A sorted version of the above file.
149 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
151 # This file is a temporary file for storing symbolic_name -> closing
152 # CVSRevision until the end of our pass where we can look up the
153 # corresponding SVNRevNum for the closing revs and write these out to
154 # the SYMBOL_OPENINGS_CLOSINGS.
155 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
157 # Skeleton version of an svn filesystem.
158 # (These supersede and will eventually replace the two above.)
159 # See class SVNRepositoryMirror for how these work.
160 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
161 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
163 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
164 # SYMBOL_OPENINGS_CLOSINGS_SORTED
165 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
167 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
168 # the CVSRevision is the last such that is a source for those symbolic
169 # names. For example, if branch B's number is 1.3.0.2 in this CVS
170 # file, and this file's 1.3 is the latest (by date) revision among
171 # *all* CVS files that is a source for branch B, then the
172 # CVSRevision.unique_key() corresponding to this file at 1.3 would
173 # list at least B in its list.
174 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
176 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
177 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
178 ### the s-revs data in this database.
179 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
181 # Lists all symbolic names that are tags. Keys are strings (symbolic
182 # names), values are ignorable.
183 TAGS_DB = 'cvs2svn-tags.db'
185 # A list all tags. Each line consists of the tag name and the number
186 # of files in which it exists, separated by a space.
187 TAGS_LIST = 'cvs2svn-tags.txt'
189 # A list of all branches. The file is stored as a plain text file
190 # to make it easy to look at in an editor. Each line contains the
191 # branch name, the number of files where the branch is created, the
192 # commit count, and a list of tags and branches that are defined on
193 # revisions in the branch.
194 BRANCHES_LIST = 'cvs2svn-branches.txt'
196 # These two databases provide a bidirectional mapping between
197 # CVSRevision.unique_key()s and Subversion revision numbers.
199 # The first maps CVSRevision.unique_key() to a number; the values are
200 # not unique.
202 # The second maps a number to a list of CVSRevision.unique_key()s.
203 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
204 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
206 # This database maps svn_revnums to tuples of (symbolic_name, date).
208 # The svn_revnums are the revision numbers of all non-primary
209 # SVNCommits. No primary SVNCommit has a key in this database.
211 # The date is stored for all commits in this database.
213 # For commits that fill symbolic names, the symbolic_name is stored.
214 # For commits that default branch syncs, the symbolic_name is None.
215 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
217 # This database maps svn_revnums of a default branch synchronization
218 # commit to the svn_revnum of the primary SVNCommit that motivated it.
220 # (NOTE: Secondary commits that fill branches and tags also have a
221 # motivating commit, but we do not record it because it is (currently)
222 # not needed for anything.)
224 # This mapping is used when generating the log message for the commit
225 # that synchronizes the default branch with trunk.
226 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
228 # How many bytes to read at a time from a pipe. 128 kiB should be
229 # large enough to be efficient without wasting too much memory.
230 PIPE_READ_SIZE = 128 * 1024
232 # Record the default RCS branches, if any, for CVS filepaths.
234 # The keys are CVS filepaths, relative to the top of the repository
235 # and with the ",v" stripped off, so they match the cvs paths used in
236 # Commit.commit(). The values are vendor branch revisions, such as
237 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
238 # represents the highest vendor branch revision thought to have ever
239 # been head of the default branch.
241 # The reason we record a specific vendor revision, rather than a
242 # default branch number, is that there are two cases to handle:
244 # One case is simple. The RCS file lists a default branch explicitly
245 # in its header, such as '1.1.1'. In this case, we know that every
246 # revision on the vendor branch is to be treated as head of trunk at
247 # that point in time.
249 # But there's also a degenerate case. The RCS file does not currently
250 # have a default branch, yet we can deduce that for some period in the
251 # past it probably *did* have one. For example, the file has vendor
252 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
253 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
254 # case, we should record 1.1.1.96 as the last vendor revision to have
255 # been the head of the default branch.
256 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
258 # Records the author and log message for each changeset.
259 # The keys are author+log digests, the same kind used to identify
260 # unique revisions in the .revs, etc files. Each value is a tuple
261 # of two elements: '(author logmessage)'.
262 METADATA_DB = "cvs2svn-metadata.db"
264 REVS_SUFFIX = '.revs'
265 CLEAN_REVS_SUFFIX = '.c-revs'
266 SORTED_REVS_SUFFIX = '.s-revs'
267 RESYNC_SUFFIX = '.resync'
269 SVN_INVALID_REVNUM = -1
271 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
273 # Things that can happen to a file.
274 OP_NOOP = '-'
275 OP_ADD = 'A'
276 OP_DELETE = 'D'
277 OP_CHANGE = 'C'
279 # A deltatext either does or doesn't represent some change.
280 DELTATEXT_NONEMPTY = 'N'
281 DELTATEXT_EMPTY = 'E'
283 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
285 # Constants used in SYMBOL_OPENINGS_CLOSINGS
286 OPENING = 'O'
287 CLOSING = 'C'
289 def temp(basename):
290 """Return a path to BASENAME in Ctx().tmpdir.
291 This is a convenience function to save horizontal space in source."""
292 return os.path.join(Ctx().tmpdir, basename)
294 # Since the unofficial set also includes [/\] we need to translate those
295 # into ones that don't conflict with Subversion limitations.
296 def _clean_symbolic_name(name):
297 """Return symbolic name NAME, translating characters that Subversion
298 does not allow in a pathname."""
299 name = name.replace('/','++')
300 name = name.replace('\\','--')
301 return name
303 def _path_join(*components):
304 """Join two or more pathname COMPONENTS, inserting '/' as needed.
305 Empty component are skipped."""
306 return string.join(filter(None, components), '/')
308 def run_command(command):
309 if os.system(command):
310 sys.exit('Command failed: "%s"' % command)
312 def relative_name(cvsroot, fname):
313 l = len(cvsroot)
314 if fname[:l] == cvsroot:
315 if fname[l] == os.sep:
316 return string.replace(fname[l+1:], os.sep, '/')
317 return string.replace(fname[l:], os.sep, '/')
318 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
319 " cvsroot\n" % (error_prefix, cvsroot, fname))
320 sys.exit(1)
322 def get_co_pipe(c_rev):
323 """Return a command string, and the pipe created using that string.
324 C_REV is a CVSRevision. The pipe returns the text of that CVS Revision."""
325 ctx = Ctx()
326 if ctx.use_cvs:
327 pipe_cmd = 'cvs %s co -r%s -p %s' % \
328 (ctx.cvs_global_arguments, c_rev.rev,
329 escape_shell_arg(ctx.cvs_module + c_rev.cvs_path))
330 else:
331 pipe_cmd = 'co -q -x,v -p%s %s' % \
332 (c_rev.rev, escape_shell_arg(c_rev.rcs_path()))
333 pipe = Popen3(pipe_cmd, True)
334 pipe.tochild.close()
335 return pipe_cmd, pipe
337 def generate_ignores(c_rev):
338 # Read in props
339 pipe_cmd, pipe = get_co_pipe(c_rev)
340 buf = pipe.fromchild.read(PIPE_READ_SIZE)
341 raw_ignore_val = ""
342 while buf:
343 raw_ignore_val = raw_ignore_val + buf
344 buf = pipe.fromchild.read(PIPE_READ_SIZE)
345 pipe.fromchild.close()
346 error_output = pipe.childerr.read()
347 exit_status = pipe.wait()
348 if exit_status:
349 sys.exit("%s: The command '%s' failed with exit status: %s\n"
350 "and the following output:\n"
351 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
353 # Tweak props: First, convert any spaces to newlines...
354 raw_ignore_val = '\n'.join(raw_ignore_val.split())
355 raw_ignores = raw_ignore_val.split('\n')
356 ignore_vals = [ ]
357 for ignore in raw_ignores:
358 # Reset the list if we encounter a '!'
359 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
360 if ignore == '!':
361 ignore_vals = [ ]
362 continue
363 # Skip empty lines
364 if len(ignore) == 0:
365 continue
366 ignore_vals.append(ignore)
367 return ignore_vals
369 # Return a string that has not been returned by gen_key() before.
370 gen_key_base = 0L
371 def gen_key():
372 global gen_key_base
373 key = '%x' % gen_key_base
374 gen_key_base = gen_key_base + 1
375 return key
377 if sys.platform == "win32":
378 def escape_shell_arg(str):
379 return '"' + string.replace(str, '"', '"^""') + '"'
380 else:
381 def escape_shell_arg(str):
382 return "'" + string.replace(str, "'", "'\\''") + "'"
384 def format_date(date):
385 """Return an svn-compatible date string for DATE (seconds since epoch)."""
386 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
387 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
389 def sort_file(infile, outfile):
390 # sort the log files
392 # GNU sort will sort our dates differently (incorrectly!) if our
393 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
394 # it to 'C'
395 if os.environ.has_key('LC_ALL'):
396 lc_all_tmp = os.environ['LC_ALL']
397 else:
398 lc_all_tmp = None
399 os.environ['LC_ALL'] = 'C'
400 # The -T option to sort has a nice side effect. The Win32 sort is
401 # case insensitive and cannot be used, and since it does not
402 # understand the -T option and dies if we try to use it, there is
403 # no risk that we use that sort by accident.
404 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
405 if lc_all_tmp is None:
406 del os.environ['LC_ALL']
407 else:
408 os.environ['LC_ALL'] = lc_all_tmp
410 def print_node_tree(tree, root_node, indent_depth=0):
411 """For debugging purposes. Prints all nodes in TREE that are
412 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
413 debugging with the print statement in this function."""
414 if not indent_depth:
415 print "TREE", "=" * 75
416 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
417 for key, value in tree[root_node].items():
418 if key[0] == '/': #Skip flags
419 continue
420 print_node_tree(tree, value, (indent_depth + 1))
422 def match_regexp_list(regexp_list, string):
423 """Return 1 if string matches any of the compiled regexps in REGEXP_LIST,
424 else return None."""
425 for regexp in regexp_list:
426 if regexp.match(string):
427 return 1
429 # These constants represent the log levels that this script supports
430 LOG_WARN = -1
431 LOG_QUIET = 0
432 LOG_NORMAL = 1
433 LOG_VERBOSE = 2
434 class Log:
435 """A Simple logging facility. Each line will be timestamped is
436 self.use_timestamps is TRUE. This class is a Borg, see
437 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
438 __shared_state = {}
439 def __init__(self):
440 self.__dict__ = self.__shared_state
441 if self.__dict__:
442 return
443 self.log_level = LOG_NORMAL
444 # Set this to true if you want to see timestamps on each line output.
445 self.use_timestamps = None
446 self.logger = sys.stdout
448 def _timestamp(self):
449 """Output a detailed timestamp at the beginning of each line output."""
450 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
452 def write(self, log_level, *args):
453 """This is the public method to use for writing to a file. Only
454 messages whose LOG_LEVEL is <= self.log_level will be printed. If
455 there are multiple ARGS, they will be separated by a space."""
456 if log_level > self.log_level:
457 return
458 if self.use_timestamps:
459 self._timestamp()
460 self.logger.write(' '.join(map(str,args)) + "\n")
461 # Ensure that log output doesn't get out-of-order with respect to
462 # stderr output.
463 self.logger.flush()
466 class Cleanup:
467 """This singleton class manages any files created by cvs2svn. When
468 you first create a file, call Cleanup.register, passing the
469 filename, and the last pass that you need the file. After the end
470 of that pass, your file will be cleaned up after running an optional
471 callback. This class is a Borg, see
472 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
474 __shared_state = {}
475 def __init__(self):
476 self.__dict__ = self.__shared_state
477 if self.__dict__:
478 return
479 self._log = {}
480 self._callbacks = {}
482 def register(self, file, which_pass, callback=None):
483 """Register FILE for cleanup at the end of WHICH_PASS, running
484 function CALLBACK prior to removal. Registering a given FILE is
485 idempotent; you may register as many times as you wish, but it
486 will only be cleaned up once.
488 Note that if a file is registered multiple times, only the first
489 callback registered for that file will be called at cleanup
490 time. Also note that if you register a database file you must
491 close the database before cleanup, e.g. using a callback."""
492 if not self._log.has_key(which_pass):
493 self._log[which_pass] = {}
494 self._log[which_pass][file] = 1
495 if callback and not self._callbacks.has_key(file):
496 self._callbacks[file] = callback
498 def cleanup(self, which_pass):
499 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
500 if not self._log.has_key(which_pass):
501 return
502 for file in self._log[which_pass].keys():
503 Log().write(LOG_VERBOSE, "Deleting", file)
504 if self._callbacks.has_key(file):
505 self._callbacks[file]()
506 os.unlink(file)
509 # Always use these constants for opening databases.
510 DB_OPEN_READ = 'r'
511 DB_OPEN_NEW = 'n'
513 # A wrapper for anydbm that uses the marshal module to store items as
514 # strings.
515 class Database:
516 def __init__(self, filename, mode):
517 # pybsddb3 has a bug which prevents it from working with
518 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
519 # causes the DB_TRUNCATE flag to be passed, which is disallowed
520 # for databases protected by lock and transaction support
521 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
523 # Therefore, manually perform the removal (we can do this, because
524 # we know that for bsddb - but *not* anydbm in general - the database
525 # consists of one file with the name we specify, rather than several
526 # based on that name).
527 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
528 if os.path.isfile(filename):
529 os.unlink(filename)
530 mode = 'c'
532 self.db = anydbm.open(filename, mode)
534 def has_key(self, key):
535 return self.db.has_key(key)
537 def __getitem__(self, key):
538 return marshal.loads(self.db[key])
540 def __setitem__(self, key, value):
541 self.db[key] = marshal.dumps(value)
543 def __delitem__(self, key):
544 del self.db[key]
546 def get(self, key, default):
547 if self.has_key(key):
548 return self.__getitem__(key)
549 return default
552 class StatsKeeper:
553 __shared_state = { }
554 def __init__(self):
555 self.__dict__ = self.__shared_state
556 if self.__dict__:
557 return
558 self.filename = temp(STATISTICS_FILE)
559 Cleanup().register(self.filename, pass8)
560 # This can get kinda large, so we don't store it in our data dict.
561 self.repos_files = { }
563 if os.path.exists(self.filename):
564 self.unarchive()
565 else:
566 self.data = { 'cvs_revs_count' : 0,
567 'tags': { },
568 'branches' : { },
569 'repos_size' : 0,
570 'repos_file_count' : 0,
571 'svn_rev_count' : None,
572 'first_rev_date' : 1L<<32,
573 'last_rev_date' : 0,
574 'pass_timings' : { },
575 'start_time' : 0,
576 'end_time' : 0,
579 def log_duration_for_pass(self, duration, pass_num):
580 self.data['pass_timings'][pass_num] = duration
582 def set_start_time(self, start):
583 self.data['start_time'] = start
585 def set_end_time(self, end):
586 self.data['end_time'] = end
588 def _bump_item(self, key, amount=1):
589 self.data[key] = self.data[key] + amount
591 def reset_c_rev_info(self):
592 self.data['cvs_revs_count'] = 0
593 self.data['tags'] = { }
594 self.data['branches'] = { }
596 def record_c_rev(self, c_rev):
597 self._bump_item('cvs_revs_count')
599 for tag in c_rev.tags:
600 self.data['tags'][tag] = None
601 for branch in c_rev.branches:
602 self.data['branches'][branch] = None
604 if c_rev.timestamp < self.data['first_rev_date']:
605 self.data['first_rev_date'] = c_rev.timestamp
607 if c_rev.timestamp > self.data['last_rev_date']:
608 self.data['last_rev_date'] = c_rev.timestamp
610 # Only add the size if this is the first time we see the file.
611 if not self.repos_files.has_key(c_rev.fname):
612 self._bump_item('repos_size', c_rev.file_size)
613 self.repos_files[c_rev.fname] = None
615 self.data['repos_file_count'] = len(self.repos_files)
617 def set_svn_rev_count(self, count):
618 self.data['svn_rev_count'] = count
620 def svn_rev_count(self):
621 return self.data['svn_rev_count']
623 def archive(self):
624 open(self.filename, 'w').write(marshal.dumps(self.data))
626 def unarchive(self):
627 self.data = marshal.loads(open(self.filename, 'r').read())
629 def __str__(self):
630 svn_revs_str = ""
631 if self.data['svn_rev_count'] is not None:
632 svn_revs_str = ('Total SVN Commits: %10s\n'
633 % self.data['svn_rev_count'])
635 return ('\n' \
636 'cvs2svn Statistics:\n' \
637 '------------------\n' \
638 'Total CVS Files: %10i\n' \
639 'Total CVS Revisions: %10i\n' \
640 'Total Unique Tags: %10i\n' \
641 'Total Unique Branches: %10i\n' \
642 'CVS Repos Size in KB: %10i\n' \
643 '%s' \
644 'First Revision Date: %s\n' \
645 'Last Revision Date: %s\n' \
646 '------------------' \
647 % (self.data['repos_file_count'],
648 self.data['cvs_revs_count'],
649 len(self.data['tags']),
650 len(self.data['branches']),
651 (self.data['repos_size'] / 1024),
652 svn_revs_str,
653 time.ctime(self.data['first_rev_date']),
654 time.ctime(self.data['last_rev_date']),
657 def timings(self):
658 passes = self.data['pass_timings'].keys()
659 passes.sort()
660 str = 'Timings:\n------------------\n'
662 def desc(val):
663 if val == 1: return "second"
664 return "seconds"
666 for pass_num in passes:
667 duration = int(self.data['pass_timings'][pass_num])
668 p_str = ('pass %d:%6d %s\n'
669 % (pass_num, duration, desc(duration)))
670 str = str + p_str
672 total = int(self.data['end_time'] - self.data['start_time'])
673 str = str + ('total: %6d %s' % (total, desc(total)))
674 return str
677 class LastSymbolicNameDatabase:
678 """ Passing every CVSRevision in s-revs to this class will result in
679 a Database whose key is the last CVS Revision a symbolicname was
680 seen in, and whose value is a list of all symbolicnames that were
681 last seen in that revision."""
682 def __init__(self, mode):
683 self.symbols = {}
684 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
685 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
687 # Once we've gone through all the revs,
688 # symbols.keys() will be a list of all tags and branches, and
689 # their corresponding values will be a key into the last CVS revision
690 # that they were used in.
691 def log_revision(self, c_rev):
692 # Gather last CVS Revision for symbolic name info and tag info
693 for tag in c_rev.tags:
694 self.symbols[tag] = c_rev.unique_key()
695 if c_rev.op is not OP_DELETE:
696 for branch in c_rev.branches:
697 self.symbols[branch] = c_rev.unique_key()
699 # Creates an inversion of symbols above--a dictionary of lists (key
700 # = CVS rev unique_key: val = list of symbols that close in that
701 # rev.
702 def create_database(self):
703 for sym, rev_unique_key in self.symbols.items():
704 if self.symbol_revs_db.has_key(rev_unique_key):
705 ary = self.symbol_revs_db[rev_unique_key]
706 ary.append(sym)
707 self.symbol_revs_db[rev_unique_key] = ary
708 else:
709 self.symbol_revs_db[rev_unique_key] = [sym]
712 class CVSRevisionDatabase:
713 """A Database to store CVSRevision objects and retrieve them by their
714 unique_key()."""
716 def __init__(self, mode):
717 """Initialize an instance, opening database in MODE (like the MODE
718 argument to Database or anydbm.open())."""
719 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
720 Cleanup().register(temp(CVS_REVS_DB), pass8)
722 def log_revision(self, c_rev):
723 """Add C_REV, a CVSRevision, to the database."""
724 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
726 def get_revision(self, unique_key):
727 """Return the CVSRevision stored under UNIQUE_KEY."""
728 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
731 class TagsDatabase(Database):
732 """A Database to store which symbolic names are tags.
733 Each key is a tag name.
734 The value has no meaning, and should be set to None."""
735 def __init__(self, mode):
736 Database.__init__(self, temp(TAGS_DB), mode)
737 Cleanup().register(temp(TAGS_DB), pass8)
740 class CVSRevision:
741 def __init__(self, ctx, *args):
742 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
744 If CTX is None, the following members and methods of the
745 instantiated CVSRevision class object will be unavailable (or
746 simply will not work correctly, if at all):
747 cvs_path
748 svn_path
749 svn_trunk_path
750 is_default_branch_revision()
752 (Note that this class treats CTX as const, because the caller
753 likely passed in a Borg instance of a Ctx. The reason this class
754 takes CTX as as a parameter, instead of just instantiating a Ctx
755 itself, is that this class should be usable outside cvs2svn.)
757 If there is one argument in ARGS, it is a string, in the format of
758 a line from a revs file. Do *not* include a trailing newline.
760 If there are multiple ARGS, there must be 16 of them,
761 comprising a parsed revs line:
762 timestamp --> (int) date stamp for this cvs revision
763 digest --> (string) digest of author+logmsg
764 prev_timestamp --> (int) date stamp for the previous cvs revision
765 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
766 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
767 rev --> (string) this CVS rev, e.g., "1.3"
768 next_rev --> (string or None) next CVS rev, e.g., "1.4"
769 file_in_attic --> (char or None) true if RCS file is in Attic
770 file_executable --> (char or None) true if RCS file has exec bit set.
771 file_size --> (int) size of the RCS file
772 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
773 mode --> (string or None) "kkv", "kb", etc.
774 branch_name --> (string or None) branch on which this rev occurred
775 tags --> (list of strings) all tags on this revision
776 branches --> (list of strings) all branches rooted in this rev
777 fname --> (string) relative path of file in CVS repos
779 The two forms of initialization are equivalent."""
781 self._ctx = ctx
782 if len(args) == 16:
783 (self.timestamp, self.digest, self.prev_timestamp, self.op,
784 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
785 self.file_executable, self.file_size, self.deltatext_code, self.fname,
786 self.mode, self.branch_name, self.tags, self.branches) = args
787 elif len(args) == 1:
788 data = args[0].split(' ', 14)
789 self.timestamp = int(data[0], 16)
790 self.digest = data[1]
791 if data[2] == "*":
792 self.prev_timestamp = 0
793 else:
794 self.prev_timestamp = int(data[2])
795 self.op = data[3]
796 self.prev_rev = data[4]
797 if self.prev_rev == "*":
798 self.prev_rev = None
799 self.rev = data[5]
800 self.next_rev = data[6]
801 if self.next_rev == "*":
802 self.next_rev = None
803 self.file_in_attic = data[7]
804 if self.file_in_attic == "*":
805 self.file_in_attic = None
806 self.file_executable = data[8]
807 if self.file_executable == "*":
808 self.file_executable = None
809 self.file_size = int(data[9])
810 self.deltatext_code = data[10]
811 self.mode = data[11]
812 if self.mode == "*":
813 self.mode = None
814 self.branch_name = data[12]
815 if self.branch_name == "*":
816 self.branch_name = None
817 ntags = int(data[13])
818 tags = data[14].split(' ', ntags + 1)
819 nbranches = int(tags[ntags])
820 branches = tags[ntags + 1].split(' ', nbranches)
821 self.fname = branches[nbranches]
822 self.tags = tags[:ntags]
823 self.branches = branches[:nbranches]
824 else:
825 raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
826 (len(args) + 1)
827 if ctx is not None:
828 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
829 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
830 self.svn_trunk_path = self._make_path(self.cvs_path)
832 # The 'primary key' of a CVS Revision is the revision number + the
833 # filename. To provide a unique key (say, for a dict), we just glom
834 # them together in a string. By passing in self.prev_rev or
835 # self.next_rev, you can get the unique key for their respective
836 # CVSRevisions.
837 def unique_key(self, revnum=None):
838 if revnum is None:
839 revnum = self.rev
840 return revnum + "/" + self.fname
842 def __str__(self):
843 return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
844 self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
845 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
846 (self.file_in_attic or "*"), (self.file_executable or "*"),
847 self.file_size,
848 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
849 len(self.tags), self.tags and " " or "", " ".join(self.tags),
850 len(self.branches), self.branches and " " or "", " ".join(self.branches),
851 self.fname, ))
853 # Returns true if this CVSRevision is the opening CVSRevision for
854 # NAME (for this RCS file).
855 def opens_symbolic_name(self, name):
856 if name in self.tags:
857 return 1
858 if name in self.branches:
859 # If this c_rev opens a branch and our op is OP_DELETE, then
860 # that means that the file that this c_rev belongs to was
861 # created on the branch, so for all intents and purposes, this
862 # c_rev is *technically* not an opening. See Issue #62 for more
863 # information.
864 if self.op != OP_DELETE:
865 return 1
866 return 0
868 def is_default_branch_revision(self):
869 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
870 revision according to DEFAULT_BRANCHES_DB (see the conditions
871 documented there), else return None."""
872 if self._ctx._default_branches_db.has_key(self.cvs_path):
873 val = self._ctx._default_branches_db[self.cvs_path]
874 val_last_dot = val.rindex(".")
875 our_last_dot = self.rev.rindex(".")
876 default_branch = val[:val_last_dot]
877 our_branch = self.rev[:our_last_dot]
878 default_rev_component = int(val[val_last_dot + 1:])
879 our_rev_component = int(self.rev[our_last_dot + 1:])
880 if (default_branch == our_branch
881 and our_rev_component <= default_rev_component):
882 return 1
883 # else
884 return None
886 def _make_path(self, path, branch_name = None):
887 """Return the trunk path or branch path for PATH.
889 If PATH is None, return None."""
890 # For a while, we treated each top-level subdir of the CVS
891 # repository as a "project root" and interpolated the appropriate
892 # genealogy (trunk|tag|branch) in according to the official
893 # recommended layout. For example, the path '/foo/bar/baz.c' on
894 # branch 'Rel2' would become
896 # /foo/branches/Rel2/bar/baz.c
898 # and on trunk it would become
900 # /foo/trunk/bar/baz.c
902 # However, we went back to the older and simpler method of just
903 # prepending the genealogy to the front, instead of interpolating.
904 # So now we produce:
906 # /branches/Rel2/foo/bar/baz.c
907 # /trunk/foo/bar/baz.c
909 # Why? Well, Jack Repenning pointed out that this way is much
910 # friendlier to "anonymously rooted subtrees" (that's a tree where
911 # the name of the top level dir doesn't matter, the point is that if
912 # you cd into it and, say, run 'make', something good will happen).
913 # By interpolating, we made it impossible to point cvs2svn at some
914 # subdir in the CVS repository and convert it as a project, because
915 # we'd treat every subdir underneath it as an independent project
916 # root, which is probably not what the user wanted.
918 # Also, see Blair Zajac's post
920 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
922 # and the surrounding thread, for why what people really want is a
923 # way of specifying an in-repository prefix path, not interpolation.
924 if path is None:
925 return None
927 if branch_name:
928 branch_name = _clean_symbolic_name(branch_name)
929 return self._ctx.branches_base + '/' + branch_name + '/' + path
930 else:
931 return self._ctx.trunk_base + '/' + path
933 def rcs_path(self):
934 """Returns the actual filesystem path to the RCS file of this
935 CVSRevision."""
936 if self.file_in_attic is None:
937 return self.fname
938 else:
939 basepath, filename = os.path.split(self.fname)
940 return os.path.join(basepath, 'Attic', filename)
942 def filename(self):
943 "Return the last path component of self.fname, minus the ',v'"
944 return os.path.split(self.fname)[-1][:-2]
946 class SymbolDatabase:
947 """This database records information on all symbols in the RCS
948 files. It is created in pass 1 and it is used in pass 2."""
949 def __init__(self):
950 # A hash that maps tag names to commit counts
951 self.tags = { }
952 # A hash that maps branch names to lists of the format
953 # [ create_count, commit_count, blockers ], where blockers
954 # is a hash that lists the symbols that depend on the
955 # the branch. The blockers hash is used as a set, so the
956 # values are not used.
957 self.branches = { }
959 def register_tag_creation(self, name):
960 """Register the creation of the tag NAME."""
961 if not self.tags.has_key(name):
962 self.tags[name] = 0
963 self.tags[name] += 1
965 def _branch(self, name):
966 """Helper function to get a branch node that will create and
967 initialize the node if it does not exist."""
968 if not self.branches.has_key(name):
969 self.branches[name] = [ 0, 0, { } ]
970 return self.branches[name]
972 def register_branch_creation(self, name):
973 """Register the creation of the branch NAME."""
974 self._branch(name)[0] += 1
976 def register_branch_commit(self, name):
977 """Register a commit on the branch NAME."""
978 self._branch(name)[1] += 1
980 def register_branch_blocker(self, name, blocker):
981 """Register BLOCKER as a blocker on the branch NAME."""
982 self._branch(name)[2][blocker] = None
984 def branch_has_commit(self, name):
985 """Return non-zero if NAME has commits. Returns 0 if name
986 is not a branch or if it has no commits."""
987 return self.branches.has_key(name) and self.branches[name][1]
989 def find_excluded_symbols(self, regexp_list):
990 """Returns a hash of all symbols thaht match the regexps in
991 REGEXP_LISTE. The hash is used as a set so the values are
992 not used."""
993 excludes = { }
994 for tag in self.tags.keys():
995 if match_regexp_list(regexp_list, tag):
996 excludes[tag] = None
997 for branch in self.branches.keys():
998 if match_regexp_list(regexp_list, branch):
999 excludes[branch] = None
1000 return excludes
1002 def find_branch_exclude_blockers(self, branch, excludes):
1003 """Find all blockers of BRANCH, excluding the ones in the hash
1004 EXCLUDES."""
1005 blockers = { }
1006 if excludes.has_key(branch):
1007 for blocker in self.branches[branch][2]:
1008 if not excludes.has_key(blocker):
1009 blockers[blocker] = None
1010 return blockers
1012 def find_blocked_excludes(self, excludes):
1013 """Find all branches not in EXCLUDES that have blocking symbols that
1014 are not themselves excluded. Return a hash that maps branch names
1015 to a hash of blockers. The hash of blockes is used as a set so the
1016 values are not used."""
1017 blocked_branches = { }
1018 for branch in self.branches.keys():
1019 blockers = self.find_branch_exclude_blockers(branch, excludes)
1020 if blockers:
1021 blocked_branches[branch] = blockers
1022 return blocked_branches
1024 def find_mismatches(self, excludes=None):
1025 """Find all symbols that are defined as both tags and branches,
1026 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1027 the symbol name, tag count, branch count and commit count."""
1028 if excludes is None:
1029 excludes = { }
1030 mismatches = [ ]
1031 for branch in self.branches.keys():
1032 if not excludes.has_key(branch) and self.tags.has_key(branch):
1033 mismatches.append((branch, # name
1034 self.tags[branch], # tag count
1035 self.branches[branch][0], # branch count
1036 self.branches[branch][1])) # commit count
1037 return mismatches
1039 def read(self):
1040 """Read the symbol database from files."""
1041 f = open(temp(TAGS_LIST))
1042 while 1:
1043 line = f.readline()
1044 if not line:
1045 break
1046 tag, count = line.split()
1047 self.tags[tag] = int(count)
1049 f = open(temp(BRANCHES_LIST))
1050 while 1:
1051 line = f.readline()
1052 if not line:
1053 break
1054 words = line.split()
1055 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1056 for blocker in words[3:]:
1057 self.branches[words[0]][2][blocker] = None
1059 def write(self):
1060 """Store the symbol database to files."""
1061 f = open(temp(TAGS_LIST), "w")
1062 Cleanup().register(temp(TAGS_LIST), pass2)
1063 for tag, count in self.tags.items():
1064 f.write("%s %d\n" % (tag, count))
1066 f = open(temp(BRANCHES_LIST), "w")
1067 Cleanup().register(temp(BRANCHES_LIST), pass2)
1068 for branch, info in self.branches.items():
1069 f.write("%s %d %d" % (branch, info[0], info[1]))
1070 if info[2]:
1071 f.write(" ")
1072 f.write(" ".join(info[2].keys()))
1073 f.write("\n")
1075 class CollectData(cvs2svn_rcsparse.Sink):
1076 def __init__(self):
1077 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1078 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1079 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1080 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1081 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
1082 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1083 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1084 Cleanup().register(temp(METADATA_DB), pass8)
1085 self.fatal_errors = []
1086 self.num_files = 0
1087 self.symbol_db = SymbolDatabase()
1089 # 1 if we've collected data for at least one file, None otherwise.
1090 self.found_valid_file = None
1092 # See set_fname() for initializations of other variables.
1094 def set_fname(self, canonical_name, filename):
1095 """Prepare to receive data for FILENAME. FILENAME is the absolute
1096 filesystem path to the file in question, and CANONICAL_NAME is
1097 FILENAME with the 'Attic' component removed (if the file is indeed
1098 in the Attic) ."""
1099 self.fname = canonical_name
1101 # We calculate and save some file metadata here, where we can do
1102 # it only once per file, instead of waiting until later where we
1103 # would have to do the same calculations once per CVS *revision*.
1105 self.rel_name = relative_name(Ctx().cvsroot, self.fname)[:-2]
1107 # If the paths are not the same, then that means that the
1108 # canonical_name has had the 'Attic' component stripped out.
1109 self.file_in_attic = None
1110 if not canonical_name == filename:
1111 self.file_in_attic = 1
1113 file_stat = os.stat(filename)
1114 # The size of our file in bytes
1115 self.file_size = file_stat[stat.ST_SIZE]
1117 # Whether or not the executable bit is set.
1118 self.file_executable = None
1119 if file_stat[0] & stat.S_IXUSR:
1120 self.file_executable = 1
1122 # revision -> [timestamp, author, old-timestamp]
1123 self.rev_data = { }
1125 # Maps revision number (key) to the revision number of the
1126 # previous revision along this line of development.
1128 # For the first revision R on a branch, we consider the revision
1129 # from which R sprouted to be the 'previous'.
1131 # Note that this revision can't be determined arithmetically (due
1132 # to cvsadmin -o, which is why this is necessary).
1133 self.prev_rev = { }
1135 # This dict is essentially self.prev_rev with the values mapped in
1136 # the other direction, so following key -> value will yield you
1137 # the next revision number
1138 self.next_rev = { }
1140 # Track the state of each revision so that in set_revision_info,
1141 # we can determine if our op is an add/change/delete. We can do
1142 # this because in set_revision_info, we'll have all of the
1143 # revisions for a file at our fingertips, and we need to examine
1144 # the state of our prev_rev to determine if we're an add or a
1145 # change--without the state of the prev_rev, we are unable to
1146 # distinguish between an add and a change.
1147 self.rev_state = { }
1149 # Hash mapping branch numbers, like '1.7.2', to branch names,
1150 # like 'Release_1_0_dev'.
1151 self.branch_names = { }
1153 # RCS flags (used for keyword expansion).
1154 self.mode = None
1156 # Hash mapping revision numbers, like '1.7', to lists of names
1157 # indicating which branches sprout from that revision, like
1158 # ['Release_1_0_dev', 'experimental_driver', ...].
1159 self.branchlist = { }
1161 # Like self.branchlist, but the values are lists of tag names that
1162 # apply to the key revision.
1163 self.taglist = { }
1165 # If set, this is an RCS branch number -- rcsparse calls this the
1166 # "principal branch", but CVS and RCS refer to it as the "default
1167 # branch", so that's what we call it, even though the rcsparse API
1168 # setter method is still 'set_principal_branch'.
1169 self.default_branch = None
1171 # If the RCS file doesn't have a default branch anymore, but does
1172 # have vendor revisions, then we make an educated guess that those
1173 # revisions *were* the head of the default branch up until the
1174 # commit of 1.2, at which point the file's default branch became
1175 # trunk. This records the date at which 1.2 was committed.
1176 self.first_non_vendor_revision_date = None
1178 # A list of all symbols defined for the current file. Used to
1179 # prevent multiple definitions of a symbol, something which can
1180 # easily happen when --symbol-transform is used.
1181 self.defined_symbols = [ ]
1183 def set_principal_branch(self, branch):
1184 self.default_branch = branch
1186 def set_expansion(self, mode):
1187 self.mode = mode
1189 def set_branch_name(self, branch_number, name):
1190 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1191 and that NAME sprouts from BRANCH_NUMBER .
1192 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1193 for example '1.7.2' (never '1.7.0.2')."""
1194 if not self.branch_names.has_key(branch_number):
1195 self.branch_names[branch_number] = name
1196 # The branchlist is keyed on the revision number from which the
1197 # branch sprouts, so strip off the odd final component.
1198 sprout_rev = branch_number[:branch_number.rfind(".")]
1199 if not self.branchlist.has_key(sprout_rev):
1200 self.branchlist[sprout_rev] = []
1201 self.branchlist[sprout_rev].append(name)
1202 self.symbol_db.register_branch_creation(name)
1203 else:
1204 sys.stderr.write("%s: in '%s':\n"
1205 " branch '%s' already has name '%s',\n"
1206 " cannot also have name '%s', ignoring the latter\n"
1207 % (warning_prefix, self.fname, branch_number,
1208 self.branch_names[branch_number], name))
1210 def rev_to_branch_name(self, revision):
1211 """Return the name of the branch on which REVISION lies.
1212 REVISION is a non-branch revision number with an even number of,
1213 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1214 For the convenience of callers, REVISION can also be a trunk
1215 revision such as '1.2', in which case just return None."""
1216 if trunk_rev.match(revision):
1217 return None
1218 return self.branch_names.get(revision[:revision.rindex(".")])
1220 def add_cvs_branch(self, revision, branch_name):
1221 """Record the root revision and branch revision for BRANCH_NAME,
1222 based on REVISION. REVISION is a CVS branch number having an even
1223 number of components where the second-to-last is '0'. For
1224 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1225 from 1.7 and has branch number 1.7.2."""
1226 last_dot = revision.rfind(".")
1227 branch_rev = revision[:last_dot]
1228 last2_dot = branch_rev.rfind(".")
1229 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1230 self.set_branch_name(branch_rev, branch_name)
1232 def define_tag(self, name, revision):
1233 """Record a bidirectional mapping between symbolic NAME and REVISION.
1234 REVISION is an unprocessed revision number from the RCS file's
1235 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1236 This function will determine what kind of symbolic name it is by
1237 inspection, and record it in the right places."""
1238 for (pattern, replacement) in Ctx().symbol_transforms:
1239 newname = re.sub(pattern, replacement, name)
1240 if newname != name:
1241 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1242 % (name, newname))
1243 name = newname
1244 if name in self.defined_symbols:
1245 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1246 % (error_prefix, name, self.fname)
1247 sys.stderr.write(err + "\n")
1248 self.fatal_errors.append(err)
1249 self.defined_symbols.append(name)
1250 if branch_tag.match(revision):
1251 self.add_cvs_branch(revision, name)
1252 elif vendor_tag.match(revision):
1253 self.set_branch_name(revision, name)
1254 else:
1255 if not self.taglist.has_key(revision):
1256 self.taglist[revision] = []
1257 self.taglist[revision].append(name)
1258 self.symbol_db.register_tag_creation(name)
1260 def define_revision(self, revision, timestamp, author, state,
1261 branches, next):
1263 # Record the state of our revision for later calculations
1264 self.rev_state[revision] = state
1266 # store the rev_data as a list in case we have to jigger the timestamp
1267 self.rev_data[revision] = [int(timestamp), author, None]
1269 # When on trunk, the RCS 'next' revision number points to what
1270 # humans might consider to be the 'previous' revision number. For
1271 # example, 1.3's RCS 'next' is 1.2.
1273 # However, on a branch, the RCS 'next' revision number really does
1274 # point to what humans would consider to be the 'next' revision
1275 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1277 # In other words, in RCS, 'next' always means "where to find the next
1278 # deltatext that you need this revision to retrieve.
1280 # That said, we don't *want* RCS's behavior here, so we determine
1281 # whether we're on trunk or a branch and set self.prev_rev
1282 # accordingly.
1284 # One last thing. Note that if REVISION is a branch revision,
1285 # instead of mapping REVISION to NEXT, we instead map NEXT to
1286 # REVISION. Since we loop over all revisions in the file before
1287 # doing anything with the data we gather here, this 'reverse
1288 # assignment' effectively does the following:
1290 # 1. Gives us no 'prev' value for REVISION (in this
1291 # iteration... it may have been set in a previous iteration)
1293 # 2. Sets the 'prev' value for the revision with number NEXT to
1294 # REVISION. So when we come around to the branch revision whose
1295 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1296 # set.
1297 if trunk_rev.match(revision):
1298 self.prev_rev[revision] = next
1299 self.next_rev[next] = revision
1300 elif next:
1301 self.prev_rev[next] = revision
1302 self.next_rev[revision] = next
1304 for b in branches:
1305 self.prev_rev[b] = revision
1307 # Ratchet up the highest vendor head revision, if necessary.
1308 if self.default_branch:
1309 default_branch_root = self.default_branch + "."
1310 if ((revision.find(default_branch_root) == 0)
1311 and (default_branch_root.count('.') == revision.count('.'))):
1312 # This revision is on the default branch, so record that it is
1313 # the new highest default branch head revision.
1314 self.default_branches_db[self.rel_name] = revision
1315 else:
1316 # No default branch, so make an educated guess.
1317 if revision == '1.2':
1318 # This is probably the time when the file stopped having a
1319 # default branch, so make a note of it.
1320 self.first_non_vendor_revision_date = timestamp
1321 else:
1322 m = vendor_revision.match(revision)
1323 if m and ((not self.first_non_vendor_revision_date)
1324 or (timestamp < self.first_non_vendor_revision_date)):
1325 # We're looking at a vendor revision, and it wasn't
1326 # committed after this file lost its default branch, so bump
1327 # the maximum trunk vendor revision in the permanent record.
1328 self.default_branches_db[self.rel_name] = revision
1330 if not trunk_rev.match(revision):
1331 # Check for unlabeled branches, record them. We tried to collect
1332 # all branch names when we parsed the symbolic name header
1333 # earlier, of course, but that didn't catch unlabeled branches.
1334 # If a branch is unlabeled, this is our first encounter with it,
1335 # so we have to record its data now.
1336 branch_number = revision[:revision.rindex(".")]
1337 if not self.branch_names.has_key(branch_number):
1338 branch_name = "unlabeled-" + branch_number
1339 self.set_branch_name(branch_number, branch_name)
1341 # Register the commit on this non-trunk branch
1342 branch_name = self.branch_names[branch_number]
1343 self.symbol_db.register_branch_commit(branch_name)
1345 def tree_completed(self):
1346 "The revision tree has been parsed. Analyze it for consistency."
1348 # Our algorithm depends upon the timestamps on the revisions occuring
1349 # monotonically over time. That is, we want to see rev 1.34 occur in
1350 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1351 # sorting), and then tried to insert 1.34, we'd be screwed.
1353 # to perform the analysis, we'll simply visit all of the 'previous'
1354 # links that we have recorded and validate that the timestamp on the
1355 # previous revision is before the specified revision
1357 # if we have to resync some nodes, then we restart the scan. just keep
1358 # looping as long as we need to restart.
1359 while 1:
1360 for current, prev in self.prev_rev.items():
1361 if not prev:
1362 # no previous revision exists (i.e. the initial revision)
1363 continue
1364 t_c = self.rev_data[current][0]
1365 t_p = self.rev_data[prev][0]
1366 if t_p >= t_c:
1367 # the previous revision occurred later than the current revision.
1368 # shove the previous revision back in time (and any before it that
1369 # may need to shift).
1371 # We sync backwards and not forwards because any given CVS
1372 # Revision has only one previous revision. However, a CVS
1373 # Revision can *be* a previous revision for many other
1374 # revisions (e.g., a revision that is the source of multiple
1375 # branches). This becomes relevant when we do the secondary
1376 # synchronization in pass 2--we can make certain that we
1377 # don't resync a revision earlier than it's previous
1378 # revision, but it would be non-trivial to make sure that we
1379 # don't resync revision R *after* any revisions that have R
1380 # as a previous revision.
1381 while t_p >= t_c:
1382 self.rev_data[prev][0] = t_c - 1 # new timestamp
1383 self.rev_data[prev][2] = t_p # old timestamp
1384 delta = t_c - 1 - t_p
1385 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1386 % (self.rel_name,
1387 prev, time.ctime(t_p), delta)
1388 Log().write(LOG_VERBOSE, msg)
1389 if (delta > COMMIT_THRESHOLD
1390 or delta < (COMMIT_THRESHOLD * -1)):
1391 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1392 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1393 delta))
1394 current = prev
1395 prev = self.prev_rev[current]
1396 if not prev:
1397 break
1398 t_c = t_c - 1 # self.rev_data[current][0]
1399 t_p = self.rev_data[prev][0]
1401 # break from the for-loop
1402 break
1403 else:
1404 # finished the for-loop (no resyncing was performed)
1405 return
1407 def set_revision_info(self, revision, log, text):
1408 timestamp, author, old_ts = self.rev_data[revision]
1409 digest = sha.new(log + '\0' + author).hexdigest()
1410 if old_ts:
1411 # the timestamp on this revision was changed. log it for later
1412 # resynchronization of other files's revisions that occurred
1413 # for this time and log message.
1414 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1416 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1417 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1419 # If revision 1.1 appears to have been created via 'cvs add'
1420 # instead of 'cvs import', then this file probably never had a
1421 # default branch, so retroactively remove its record in the
1422 # default branches db. The test is that the log message CVS uses
1423 # for 1.1 in imports is "Initial revision\n" with no period.
1424 if revision == '1.1' and log != 'Initial revision\n':
1425 if self.default_branches_db.has_key(self.rel_name):
1426 del self.default_branches_db[self.rel_name]
1428 # Get the timestamp of the previous revision
1429 prev_rev = self.prev_rev.get(revision, None)
1430 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1432 # How to tell if a CVSRevision is an add, a change, or a deletion:
1434 # It's a delete if RCS state is 'dead'
1436 # It's an add if RCS state is 'Exp.' and
1437 # - we either have no previous revision
1438 # or
1439 # - we have a previous revision whose state is 'dead'
1441 # Anything else is a change.
1442 if self.rev_state[revision] == 'dead':
1443 op = OP_DELETE
1444 elif ((self.prev_rev.get(revision, None) is None)
1445 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1446 op = OP_ADD
1447 else:
1448 op = OP_CHANGE
1450 if text:
1451 deltatext_code = DELTATEXT_NONEMPTY
1452 else:
1453 deltatext_code = DELTATEXT_EMPTY
1455 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
1456 self.prev_rev[revision], revision,
1457 self.next_rev.get(revision),
1458 self.file_in_attic, self.file_executable,
1459 self.file_size,
1460 deltatext_code, self.fname,
1461 self.mode, self.rev_to_branch_name(revision),
1462 self.taglist.get(revision, []),
1463 self.branchlist.get(revision, []))
1464 self.revs.write(str(c_rev) + "\n")
1465 StatsKeeper().record_c_rev(c_rev)
1467 if not self.metadata_db.has_key(digest):
1468 self.metadata_db[digest] = (author, log)
1470 def parse_completed(self):
1471 # Walk through all branches and tags and register them with
1472 # their parent branch in the symbol database.
1473 for revision, symbols in self.taglist.items() + self.branchlist.items():
1474 for symbol in symbols:
1475 name = self.rev_to_branch_name(revision)
1476 if name is not None:
1477 self.symbol_db.register_branch_blocker(name, symbol)
1479 self.num_files = self.num_files + 1
1481 def write_symbol_db(self):
1482 self.symbol_db.write()
1484 class SymbolingsLogger:
1485 """Manage the file that contains lines for symbol openings and
1486 closings.
1488 This data will later be used to determine valid SVNRevision ranges
1489 from which a file can be copied when creating a branch or tag in
1490 Subversion. Do this by finding "Openings" and "Closings" for each
1491 file copied onto a branch or tag.
1493 An "Opening" is the CVSRevision from which a given branch/tag
1494 sprouts on a path.
1496 The "Closing" for that branch/tag and path is the next CVSRevision
1497 on the same line of development as the opening.
1499 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1500 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1501 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1502 'foo.c'. Note that there may be many revisions chronologically
1503 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1504 perhaps even including on branch BEE itself. But 1.3 is the next
1505 revision *on the same line* as 1.2, that is why it is the closing
1506 revision for those symbolic names of which 1.2 is the opening.
1508 The reason for doing all this hullabaloo is to make branch and tag
1509 creation as efficient as possible by minimizing the number of copies
1510 and deletes per creation. For example, revisions 1.2 and 1.3 of
1511 foo.c might correspond to revisions 17 and 30 in Subversion. That
1512 means that when creating branch BEE, there is some motivation to do
1513 the copy from one of 17-30. Now if there were another file,
1514 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1515 to revisions 24 and 39 in Subversion, we would know that the ideal
1516 thing would be to copy the branch from somewhere between 24 and 29,
1517 inclusive.
1519 def __init__(self):
1520 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1521 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1522 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1523 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1525 # This keys of this dictionary are Subversion repository *source*
1526 # paths for which we've encountered an 'opening'. The values are
1527 # the symbolic names that this path has opened. The only paths
1528 # that should be in this dict are paths whose corresponding
1529 # CVSRevision is a default branch revision.
1530 self.open_paths_with_default_branches = { }
1532 def log_revision(self, c_rev, svn_revnum):
1533 """Log any openings found in C_REV, and if C_REV.next_rev is not
1534 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1535 any) will have its revnum determined later."""
1536 for name in c_rev.tags + c_rev.branches:
1537 name = _clean_symbolic_name(name)
1538 self._note_default_branch_opening(c_rev, name)
1539 if c_rev.op != OP_DELETE:
1540 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1542 # If our c_rev has a next_rev, then that's the closing rev for
1543 # this source revision. Log it to closings for later processing
1544 # since we don't know the svn_revnum yet.
1545 if c_rev.next_rev is not None:
1546 self.closings.write('%s %s\n' %
1547 (name, c_rev.unique_key(c_rev.next_rev)))
1549 def _log(self, name, svn_revnum, svn_path, type):
1550 """Write out a single line to the symbol_openings_closings file
1551 representing that svn_revnum of svn_path is either the opening or
1552 closing (TYPE) of NAME (a symbolic name).
1554 TYPE should only be one of the following global constants:
1555 OPENING or CLOSING."""
1556 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1557 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1558 type, svn_path))
1560 def close(self):
1561 """Iterate through the closings file, lookup the svn_revnum for
1562 each closing CVSRevision, and write a proper line out to the
1563 symbolings file."""
1564 # Use this to get the c_rev.svn_path of our rev_key
1565 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1567 self.closings.close()
1568 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1569 (name, rev_key) = line.rstrip().split(" ", 1)
1570 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1572 c_rev = cvs_revs_db.get_revision(rev_key)
1573 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1575 self.symbolings.close()
1577 def _note_default_branch_opening(self, c_rev, symbolic_name):
1578 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1579 as an opening for SYMBOLIC_NAME."""
1580 path = c_rev.svn_trunk_path
1581 if not self.open_paths_with_default_branches.has_key(path):
1582 self.open_paths_with_default_branches[path] = [ ]
1583 self.open_paths_with_default_branches[path].append(symbolic_name)
1585 def log_default_branch_closing(self, c_rev, svn_revnum):
1586 """If self.open_paths_with_default_branches contains
1587 C_REV.svn_trunk_path, then call log each name in
1588 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1589 closing with SVN_REVNUM as the closing revision number. """
1590 path = c_rev.svn_trunk_path
1591 if self.open_paths_with_default_branches.has_key(path):
1592 # log each symbol as a closing
1593 for name in self.open_paths_with_default_branches[path]:
1594 self._log(name, svn_revnum, path, CLOSING)
1595 # Remove them from the openings list as we're done with them.
1596 del self.open_paths_with_default_branches[path]
1599 class PersistenceManager:
1600 """The PersistenceManager allows us to effectively store SVNCommits
1601 to disk and retrieve them later using only their subversion revision
1602 number as the key. It also returns the subversion revision number
1603 for a given CVSRevision's unique key.
1605 All information pertinent to each SVNCommit is stored in a series of
1606 on-disk databases so that SVNCommits can be retrieved on-demand.
1608 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1609 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1610 databases and be fully-featured.
1611 In 'read' mode, PersistenceManager will open existing on-disk databases
1612 and the set_* methods will be unavailable."""
1613 def __init__(self, mode):
1614 self.mode = mode
1615 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1616 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1617 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1618 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1619 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1620 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1621 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1622 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1623 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1624 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1625 ###PERF kff Elsewhere there are comments about sucking the tags db
1626 ### into memory. That seems like a good idea.
1627 if not Ctx().trunk_only:
1628 self.tags_db = TagsDatabase(DB_OPEN_READ)
1629 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
1630 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1632 # "branch_name" -> svn_revnum in which branch was last filled.
1633 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1634 # revision which would have nothing to do.
1635 self.last_filled = {}
1637 def get_svn_revnum(self, cvs_rev_unique_key):
1638 """Return the Subversion revision number in which
1639 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1640 is no mapping for CVS_REV_UNIQUE_KEY."""
1641 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1643 def get_svn_commit(self, svn_revnum):
1644 """Return an SVNCommit that corresponds to SVN_REVNUM.
1646 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1648 This method can throw SVNCommitInternalInconsistencyError.
1650 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1651 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1652 if c_rev_keys == None:
1653 return None
1655 digest = None
1656 for key in c_rev_keys:
1657 c_rev = self.cvs_revisions.get_revision(key)
1658 svn_commit.add_revision(c_rev)
1659 # Set the author and log message for this commit by using
1660 # CVSRevision metadata, but only if haven't done so already.
1661 if digest is None:
1662 digest = c_rev.digest
1663 author, log_msg = self.svn_commit_metadata[digest]
1664 svn_commit.set_author(author)
1665 svn_commit.set_log_msg(log_msg)
1667 # If we're doing a trunk-only conversion, we don't need to do any more work.
1668 if Ctx().trunk_only:
1669 return svn_commit
1671 name, date = self._get_name_and_date(svn_revnum)
1672 if name:
1673 svn_commit.set_symbolic_name(name)
1674 svn_commit.set_date(date)
1675 if self.tags_db.has_key(name):
1676 svn_commit.is_tag = 1
1678 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1679 if motivating_revnum:
1680 svn_commit.set_motivating_revnum(int(motivating_revnum))
1681 svn_commit.set_date(date)
1683 if len(svn_commit.cvs_revs) and name:
1684 msg = """An SVNCommit cannot have cvs_revisions *and* a
1685 corresponding symbolic name ('%s') to fill.""" % name
1686 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1688 return svn_commit
1690 def set_cvs_revs(self, svn_revnum, cvs_revs):
1691 """Record the bidirectional mapping between SVN_REVNUM and
1692 CVS_REVS."""
1693 if self.mode == DB_OPEN_READ:
1694 raise RuntimeError, \
1695 'Write operation attempted on read-only PersistenceManager'
1696 for c_rev in cvs_revs:
1697 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1698 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1699 for c_rev in cvs_revs:
1700 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1702 def set_name_and_date(self, svn_revnum, name, date):
1703 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1704 if self.mode == DB_OPEN_READ:
1705 raise RuntimeError, \
1706 'Write operation attempted on read-only PersistenceManager'
1707 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1708 self.last_filled[name] = svn_revnum
1710 def _get_name_and_date(self, svn_revnum):
1711 """Return a tuple containing the symbolic name and date associated
1712 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1713 associated with it."""
1714 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1716 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1717 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1718 if self.mode == DB_OPEN_READ:
1719 raise RuntimeError, \
1720 'Write operation attempted on read-only PersistenceManager'
1721 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1724 class CVSCommit:
1725 """Each instance of this class contains a number of CVS Revisions
1726 that correspond to one or more Subversion Commits. After all CVS
1727 Revisions are added to the grouping, calling process_revisions will
1728 generate a Subversion Commit (or Commits) for the set of CVS
1729 Revisions in the grouping."""
1731 def __init__(self, digest, author, log):
1732 self.digest = digest
1733 self.author = author
1734 self.log = log
1736 # Symbolic names for which the last source revision has already
1737 # been seen and for which the CVSRevisionAggregator has already
1738 # generated a fill SVNCommit. See self.process_revisions().
1739 self.done_symbols = [ ]
1741 self.files = { }
1742 # Lists of CVSRevisions
1743 self.changes = [ ]
1744 self.deletes = [ ]
1746 # Start out with a t_min higher than any incoming time T, and a
1747 # t_max lower than any incoming T. This way the first T will
1748 # push t_min down to T, and t_max up to T, naturally (without any
1749 # special-casing), and successive times will then ratchet them
1750 # outward as appropriate.
1751 self.t_min = 1L<<32
1752 self.t_max = 0
1754 # This will be set to the SVNCommit that occurs in self._commit.
1755 self.motivating_commit = None
1757 # This is a list of all non-primary commits motivated by the main
1758 # commit. We gather these so that we can set their dates to the
1759 # same date as the primary commit.
1760 self.secondary_commits = [ ]
1762 # State for handling default branches.
1764 # Here is a tempting, but ultimately nugatory, bit of logic, which
1765 # I share with you so you may appreciate the less attractive, but
1766 # refreshingly non-nugatory, logic which follows it:
1768 # If some of the commits in this txn happened on a non-trunk
1769 # default branch, then those files will have to be copied into
1770 # trunk manually after being changed on the branch (because the
1771 # RCS "default branch" appears as head, i.e., trunk, in practice).
1772 # As long as those copies don't overwrite any trunk paths that
1773 # were also changed in this commit, then we can do the copies in
1774 # the same revision, because they won't cover changes that don't
1775 # appear anywhere/anywhen else. However, if some of the trunk dst
1776 # paths *did* change in this commit, then immediately copying the
1777 # branch changes would lose those trunk mods forever. So in this
1778 # case, we need to do at least that copy in its own revision. And
1779 # for simplicity's sake, if we're creating the new revision for
1780 # even one file, then we just do all such copies together in the
1781 # new revision.
1783 # Doesn't that sound nice?
1785 # Unfortunately, Subversion doesn't support copies with sources
1786 # in the current txn. All copies must be based in committed
1787 # revisions. Therefore, we generate the above-described new
1788 # revision unconditionally.
1790 # This is a list of c_revs, and a c_rev is appended for each
1791 # default branch commit that will need to be copied to trunk (or
1792 # deleted from trunk) in some generated revision following the
1793 # "regular" revision.
1794 self.default_branch_cvs_revisions = [ ]
1796 def __cmp__(self, other):
1797 # Commits should be sorted by t_max. If both self and other have
1798 # the same t_max, break the tie using t_min, and lastly, digest
1799 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1800 or cmp(self.digest, other.digest))
1802 def has_file(self, fname):
1803 return self.files.has_key(fname)
1805 def revisions(self):
1806 return self.changes + self.deletes
1808 def opens_symbolic_name(self, name):
1809 """Returns true if any CVSRevision in this commit is on a tag or a
1810 branch or is the origin of a tag or branch."""
1811 for c_rev in self.revisions():
1812 if c_rev.opens_symbolic_name(name):
1813 return 1
1814 return 0
1816 def add_revision(self, c_rev):
1817 # Record the time range of this commit.
1819 # ### ISSUE: It's possible, though unlikely, that the time range
1820 # of a commit could get gradually expanded to be arbitrarily
1821 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1822 # problem, and anyway deciding where to break it up would be a
1823 # judgement call. For now, we just print a warning in commit() if
1824 # this happens.
1825 if c_rev.timestamp < self.t_min:
1826 self.t_min = c_rev.timestamp
1827 if c_rev.timestamp > self.t_max:
1828 self.t_max = c_rev.timestamp
1830 if c_rev.op == OP_DELETE:
1831 self.deletes.append(c_rev)
1832 else:
1833 # OP_CHANGE or OP_ADD
1834 self.changes.append(c_rev)
1836 self.files[c_rev.fname] = 1
1838 def _pre_commit(self):
1839 """Generates any SVNCommits that must exist before the main
1840 commit."""
1842 # There may be multiple c_revs in this commit that would cause
1843 # branch B to be filled, but we only want to fill B once. On the
1844 # other hand, there might be multiple branches committed on in
1845 # this commit. Whatever the case, we should count exactly one
1846 # commit per branch, because we only fill a branch once per
1847 # CVSCommit. This list tracks which branches we've already
1848 # counted.
1849 accounted_for_sym_names = [ ]
1851 def fill_needed(c_rev, pm):
1852 """Return 1 if this is the first commit on a new branch (for
1853 this file) and we need to fill the branch; else return 0
1854 (meaning that some other file's first commit on the branch has
1855 already done the fill for us).
1857 If C_REV.op is OP_ADD, only return 1 if the branch that this
1858 commit is on has no last filled revision.
1860 PM is a PersistenceManager to query.
1863 # Different '.' counts indicate that c_rev is now on a different
1864 # line of development (and may need a fill)
1865 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1866 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1867 # It should be the case that when we have a file F that
1868 # is added on branch B (thus, F on trunk is in state
1869 # 'dead'), we generate an SVNCommit to fill B iff the branch
1870 # has never been filled before.
1872 # If this c_rev.op == OP_ADD, *and* the branch has never
1873 # been filled before, then fill it now. Otherwise, no need to
1874 # fill it.
1875 if c_rev.op == OP_ADD:
1876 if pm.last_filled.get(c_rev.branch_name, None) is None:
1877 return 1
1878 else:
1879 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1880 return 1
1881 return 0
1883 for c_rev in self.changes + self.deletes:
1884 # If a commit is on a branch, we must ensure that the branch
1885 # path being committed exists (in HEAD of the Subversion
1886 # repository). If it doesn't exist, we will need to fill the
1887 # branch. After the fill, the path on which we're committing
1888 # will exist.
1889 if c_rev.branch_name \
1890 and c_rev.branch_name not in accounted_for_sym_names \
1891 and c_rev.branch_name not in self.done_symbols \
1892 and fill_needed(c_rev, Ctx()._persistence_manager):
1893 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1894 % c_rev.branch_name)
1895 svn_commit.set_symbolic_name(c_rev.branch_name)
1896 self.secondary_commits.append(svn_commit)
1897 accounted_for_sym_names.append(c_rev.branch_name)
1899 def _commit(self):
1900 """Generates the primary SVNCommit that corresponds the this
1901 CVSCommit."""
1902 # Generate an SVNCommit unconditionally. Even if the only change
1903 # in this CVSCommit is a deletion of an already-deleted file (that
1904 # is, a CVS revision in state 'dead' whose predecessor was also in
1905 # state 'dead'), the conversion will still generate a Subversion
1906 # revision containing the log message for the second dead
1907 # revision, because we don't want to lose that information.
1908 svn_commit = SVNCommit("commit")
1909 self.motivating_commit = svn_commit
1911 for c_rev in self.changes:
1912 svn_commit.add_revision(c_rev)
1913 # Only make a change if we need to. When 1.1.1.1 has an empty
1914 # deltatext, the explanation is almost always that we're looking
1915 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1916 # such imports, CVS creates an RCS file where 1.1 has the
1917 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1918 # content as 1.1. There's no reason to reflect this non-change
1919 # in the repository, so we want to do nothing in this case. (If
1920 # we were really paranoid, we could make sure 1.1's log message
1921 # is the CVS-generated "Initial revision\n", but I think the
1922 # conditions below are strict enough.)
1923 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1924 and (c_rev.rev == "1.1.1.1")):
1925 if c_rev.is_default_branch_revision():
1926 self.default_branch_cvs_revisions.append(c_rev)
1928 for c_rev in self.deletes:
1929 # When a file is added on a branch, CVS not only adds the file
1930 # on the branch, but generates a trunk revision (typically
1931 # 1.1) for that file in state 'dead'. We only want to add
1932 # this revision if the log message is not the standard cvs
1933 # fabricated log message.
1934 if c_rev.prev_rev is None:
1935 # c_rev.branches may be empty if the originating branch
1936 # has been excluded.
1937 if not c_rev.branches:
1938 continue
1939 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
1940 % (c_rev.filename(),
1941 c_rev.branches[0]))
1942 author, log_msg = \
1943 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
1944 if log_msg == cvs_generated_msg:
1945 continue
1947 svn_commit.add_revision(c_rev)
1948 if c_rev.is_default_branch_revision():
1949 self.default_branch_cvs_revisions.append(c_rev)
1951 # There is a slight chance that we didn't actually register any
1952 # CVSRevisions with our SVNCommit (see loop over self.deletes
1953 # above), so if we have no CVSRevisions, we don't flush the
1954 # svn_commit to disk and roll back our revnum.
1955 if len(svn_commit.cvs_revs) > 0:
1956 svn_commit.flush()
1957 else:
1958 # We will not be flushing this SVNCommit, so rollback the
1959 # SVNCommit revision counter.
1960 SVNCommit.revnum = SVNCommit.revnum - 1
1962 if not Ctx().trunk_only:
1963 for c_rev in self.revisions():
1964 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
1966 def _post_commit(self):
1967 """Generates any SVNCommits that we can perform now that _commit
1968 has happened. That is, handle non-trunk default branches.
1969 Sometimes an RCS file has a non-trunk default branch, so a commit
1970 on that default branch would be visible in a default CVS checkout
1971 of HEAD. If we don't copy that commit over to Subversion's trunk,
1972 then there will be no Subversion tree which corresponds to that
1973 CVS checkout. Of course, in order to copy the path over, we may
1974 first need to delete the existing trunk there. """
1976 # Only generate a commit if we have default branch revs
1977 if len(self.default_branch_cvs_revisions):
1978 # Generate an SVNCommit for all of our default branch c_revs.
1979 svn_commit = SVNCommit("post-commit default branch(es)")
1980 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
1981 for c_rev in self.default_branch_cvs_revisions:
1982 svn_commit.add_revision(c_rev)
1983 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
1984 svn_commit.revnum)
1985 self.secondary_commits.append(svn_commit)
1987 def process_revisions(self, done_symbols):
1988 """Process all the CVSRevisions that this instance has, creating
1989 one or more SVNCommits in the process. Generate fill SVNCommits
1990 only for symbols not in DONE_SYMBOLS (avoids unnecessary
1991 fills).
1993 Return the primary SVNCommit that corresponds to this CVSCommit.
1994 The returned SVNCommit is the commit that motivated any other
1995 SVNCommits generated in this CVSCommit."""
1996 self.done_symbols = done_symbols
1997 seconds = self.t_max - self.t_min + 1
1999 Log().write(LOG_VERBOSE, '-' * 60)
2000 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2001 if seconds == 1:
2002 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2003 % time.ctime(self.t_max))
2004 else:
2005 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2006 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2007 % (time.ctime(self.t_max), seconds))
2009 if seconds > COMMIT_THRESHOLD + 1:
2010 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2011 % (warning_prefix, COMMIT_THRESHOLD))
2013 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2014 self._commit()
2015 return self.motivating_commit
2017 self._pre_commit()
2018 self._commit()
2019 self._post_commit()
2021 for svn_commit in self.secondary_commits:
2022 svn_commit.set_date(self.motivating_commit.get_date())
2023 svn_commit.flush()
2025 return self.motivating_commit
2028 class SVNCommit:
2029 """This represents one commit to the Subversion Repository. There
2030 are three types of SVNCommits:
2032 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2034 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2036 3. Updates trunk to reflect the contents of a particular branch
2037 (this is to handle RCS default branches)."""
2039 # The revision number to assign to the next new SVNCommit.
2040 # We start at 2 because SVNRepositoryMirror uses the first commit
2041 # to create trunk, tags, and branches.
2042 revnum = 2
2044 class SVNCommitInternalInconsistencyError(Exception):
2045 """Exception raised if we encounter an impossible state in the
2046 SVNCommit Databases."""
2047 pass
2049 def __init__(self, description="", revnum=None, cvs_revs=None):
2050 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2051 If REVNUM, the SVNCommit will correspond to that revision number;
2052 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2053 REVNUM.
2055 It is an error to pass CVS_REVS without REVNUM, but you may pass
2056 REVNUM without CVS_REVS, and then add a revision at a time by
2057 invoking add_revision()."""
2058 self._description = description
2060 # Revprop metadata for this commit.
2062 # These initial values are placeholders. At least the log and the
2063 # date should be different by the time these are used.
2065 # They are private because their values should be returned encoded
2066 # in UTF8, but callers aren't required to set them in UTF8.
2067 # Therefore, accessor methods are used to set them, and
2068 # self.get_revprops() is used to to get them, in dictionary form.
2069 self._author = Ctx().username
2070 self._log_msg = "This log message means an SVNCommit was used too soon."
2071 self._max_date = 0 # Latest date seen so far.
2073 self.cvs_revs = cvs_revs or []
2074 if revnum:
2075 self.revnum = revnum
2076 else:
2077 self.revnum = SVNCommit.revnum
2078 SVNCommit.revnum = SVNCommit.revnum + 1
2080 # The symbolic name that is filled in this SVNCommit, if any
2081 self.symbolic_name = None
2083 # If this commit is a default branch synchronization, this
2084 # variable represents the subversion revision number of the
2085 # *primary* commit where the default branch changes actually
2086 # happened. It is None otherwise.
2088 # It is possible for multiple synchronization commits to refer to
2089 # the same motivating commit revision number, and it is possible
2090 # for a single synchronization commit to contain CVSRevisions on
2091 # multiple different default branches.
2092 self.motivating_revnum = None
2094 # is_tag is true only if this commit is a fill of a symbolic name
2095 # that is a tag, None in all other cases.
2096 self.is_tag = None
2098 def set_symbolic_name(self, name):
2099 "Set self.symbolic_name to NAME."
2100 name = _clean_symbolic_name(name)
2101 self.symbolic_name = name
2103 def set_motivating_revnum(self, revnum):
2104 "Set self.motivating_revnum to REVNUM."
2105 self.motivating_revnum = revnum
2107 def set_author(self, author):
2108 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2109 This is the only way to set an SVNCommit's author."""
2110 self._author = author
2112 def set_log_msg(self, msg):
2113 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2114 This is the only way to set an SVNCommit's log message."""
2115 self._log_msg = msg
2117 def set_date(self, date):
2118 """Set this SVNCommit's date to DATE (an integer).
2119 Note that self.add_revision() updates this automatically based on
2120 a CVSRevision; so you may not need to call this at all, and even
2121 if you do, the value may be overwritten by a later call to
2122 self.add_revision()."""
2123 self._max_date = date
2125 def get_date(self):
2126 """Returns this SVNCommit's date as an integer."""
2127 return self._max_date
2129 def get_revprops(self):
2130 """Return the Subversion revprops for this SVNCommit."""
2131 date = format_date(self._max_date)
2132 try:
2133 ### FIXME: The 'replace' behavior should be an option, like
2134 ### --encoding is.
2135 utf8_author = None
2136 if self._author is not None:
2137 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
2138 utf8_author = unicode_author.encode('utf8')
2139 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
2140 utf8_log = unicode_log.encode('utf8')
2141 return { 'svn:author' : utf8_author,
2142 'svn:log' : utf8_log,
2143 'svn:date' : date }
2144 except UnicodeError:
2145 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2146 % warning_prefix)
2147 Log().write(LOG_WARN, " author: '%s'" % self._author)
2148 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2149 Log().write(LOG_WARN, " date: '%s'" % date)
2150 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
2151 for c_rev in self.cvs_revs:
2152 Log().write(LOG_WARN, " ", c_rev.fname)
2154 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2155 "'--encoding=latin1'.\n")
2156 # It's better to fall back to the original (unknown encoding) data
2157 # than to either 1) quit or 2) record nothing at all.
2158 return { 'svn:author' : self._author,
2159 'svn:log' : self.get_log_msg(),
2160 'svn:date' : date }
2162 def add_revision(self, cvs_rev):
2163 self.cvs_revs.append(cvs_rev)
2164 if cvs_rev.timestamp > self._max_date:
2165 self._max_date = cvs_rev.timestamp
2167 def _is_primary_commit(self):
2168 """Return true if this is a primary SVNCommit, false otherwise."""
2169 return not (self.symbolic_name or self.motivating_revnum)
2171 def flush(self):
2172 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
2173 % (self.revnum, self._description))
2174 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2176 if self.motivating_revnum is not None:
2177 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2178 self.motivating_revnum)
2180 # If we're not a primary commit, then store our date and/or our
2181 # symbolic_name
2182 if not self._is_primary_commit():
2183 Ctx()._persistence_manager.set_name_and_date(self.revnum,
2184 self.symbolic_name,
2185 self._max_date)
2187 def __str__(self):
2188 """ Print a human-readable description of this SVNCommit. This
2189 description is not intended to be machine-parseable (although
2190 we're not going to stop you if you try!)"""
2192 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2193 if self.symbolic_name:
2194 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
2195 else:
2196 ret = ret + " NO symbolic name\n"
2197 ret = ret + " debug description: " + self._description + "\n"
2198 ret = ret + " cvs_revs:\n"
2199 for c_rev in self.cvs_revs:
2200 ret = ret + " " + c_rev.unique_key() + "\n"
2201 return ret
2203 def get_log_msg(self):
2204 """Returns the actual log message for a primary commit, and the
2205 appropriate manufactured log message for a secondary commit."""
2206 if self.symbolic_name is not None:
2207 return self._log_msg_for_symbolic_name_commit()
2208 elif self.motivating_revnum is not None:
2209 return self._log_msg_for_default_branch_commit()
2210 else:
2211 return self._log_msg
2213 def _log_msg_for_symbolic_name_commit(self):
2214 """Creates a log message for a manufactured commit that fills
2215 self.symbolic_name. If self.is_tag is true, write the log message
2216 as though for a tag, else write it as though for a branch."""
2217 type = 'branch'
2218 if self.is_tag:
2219 type = 'tag'
2221 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2222 space_or_newline = ' '
2223 if len(self.symbolic_name) >= 13:
2224 space_or_newline = '\n'
2226 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2227 % (type, space_or_newline, self.symbolic_name)
2229 def _log_msg_for_default_branch_commit(self):
2230 """Creates a log message for a manufactured commit that
2231 synchronizes a non-trunk default branch with trunk."""
2232 msg = 'This commit was generated by cvs2svn to compensate for ' \
2233 'changes in r%d,\n' \
2234 'which included commits to RCS files with non-trunk default ' \
2235 'branches.\n' % self.motivating_revnum
2236 return msg
2238 class CVSRevisionAggregator:
2239 """This class groups CVSRevisions into CVSCommits that represent
2240 at least one SVNCommit."""
2241 def __init__(self):
2242 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2243 if not Ctx().trunk_only:
2244 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_READ)
2245 self.cvs_commits = {}
2246 self.pending_symbols = {}
2247 # A list of symbols for which we've already encountered the last
2248 # CVSRevision that is a source for that symbol. That is, the
2249 # final fill for this symbol has been done, and we never need to
2250 # fill it again.
2251 self.done_symbols = [ ]
2253 # This variable holds the most recently created primary svn_commit
2254 # object. CVSRevisionAggregator maintains this variable merely
2255 # for its date, so that it can set dates for the SVNCommits
2256 # created in self.attempt_to_commit_symbols().
2257 self.latest_primary_svn_commit = None
2259 Ctx()._symbolings_logger = SymbolingsLogger()
2260 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2261 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2262 DB_OPEN_READ)
2265 def process_revision(self, c_rev):
2266 # Each time we read a new line, we scan the commits we've
2267 # accumulated so far to see if any are ready for processing now.
2268 ready_queue = [ ]
2269 for digest_key, cvs_commit in self.cvs_commits.items():
2270 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2271 ready_queue.append(cvs_commit)
2272 del self.cvs_commits[digest_key]
2273 continue
2274 # If the inbound commit is on the same file as a pending commit,
2275 # close the pending commit to further changes. Don't flush it though,
2276 # as there may be other pending commits dated before this one.
2277 # ### ISSUE: the has_file() check below is not optimal.
2278 # It does fix the dataloss bug where revisions would get lost
2279 # if checked in too quickly, but it can also break apart the
2280 # commits. The correct fix would require tracking the dependencies
2281 # between change sets and committing them in proper order.
2282 if cvs_commit.has_file(c_rev.fname):
2283 unused_id = digest_key + '-'
2284 # Find a string that does is not already a key in
2285 # the self.cvs_commits dict
2286 while self.cvs_commits.has_key(unused_id):
2287 unused_id = unused_id + '-'
2288 self.cvs_commits[unused_id] = cvs_commit
2289 del self.cvs_commits[digest_key]
2291 # Add this item into the set of still-available commits.
2292 if self.cvs_commits.has_key(c_rev.digest):
2293 cvs_commit = self.cvs_commits[c_rev.digest]
2294 else:
2295 author, log = self.metadata_db[c_rev.digest]
2296 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2297 author, log)
2298 cvs_commit = self.cvs_commits[c_rev.digest]
2299 cvs_commit.add_revision(c_rev)
2301 # If there are any elements in the ready_queue at this point, they
2302 # need to be processed, because this latest rev couldn't possibly
2303 # be part of any of them. Sort them into time-order, then process
2304 # 'em.
2305 ready_queue.sort()
2307 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2308 # commits are ready.
2309 if len(ready_queue) == 0:
2310 self.attempt_to_commit_symbols(ready_queue, c_rev)
2312 for cvs_commit in ready_queue[:]:
2313 self.latest_primary_svn_commit \
2314 = cvs_commit.process_revisions(self.done_symbols)
2315 ready_queue.remove(cvs_commit)
2316 self.attempt_to_commit_symbols(ready_queue, c_rev)
2318 def flush(self):
2319 """Commit anything left in self.cvs_commits. Then inform the
2320 SymbolingsLogger that all commits are done."""
2322 ready_queue = [ ]
2323 for k, v in self.cvs_commits.items():
2324 ready_queue.append((v, k))
2326 ready_queue.sort()
2327 for cvs_commit_tuple in ready_queue[:]:
2328 self.latest_primary_svn_commit = \
2329 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2330 ready_queue.remove(cvs_commit_tuple)
2331 del self.cvs_commits[cvs_commit_tuple[1]]
2332 self.attempt_to_commit_symbols([])
2334 if not Ctx().trunk_only:
2335 Ctx()._symbolings_logger.close()
2337 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2339 This function generates 1 SVNCommit for each symbol in
2340 self.pending_symbols that doesn't have an opening CVSRevision in
2341 either QUEUED_COMMITS or self.cvs_commits.values().
2343 If C_REV is not None, then we first add to self.pending_symbols
2344 any symbols from C_REV that C_REV is the last CVSRevision for.
2346 # If we're not doing a trunk-only conversion, get the symbolic
2347 # names that this c_rev is the last *source* CVSRevision for and
2348 # add them to those left over from previous passes through the
2349 # aggregator.
2350 if c_rev and not Ctx().trunk_only:
2351 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2352 self.pending_symbols[sym] = None
2354 # Make a list of all symbols that still have *source* CVSRevisions
2355 # in the pending commit queue (self.cvs_commits).
2356 open_symbols = {}
2357 for sym in self.pending_symbols.keys():
2358 for cvs_commit in self.cvs_commits.values() + queued_commits:
2359 if cvs_commit.opens_symbolic_name(sym):
2360 open_symbols[sym] = None
2361 break
2363 # Sort the pending symbols so that we will always process the
2364 # symbols in the same order, regardless of the order in which the
2365 # dict hashing algorithm hands them back to us. We do this so
2366 # that our tests will get the same results on all platforms.
2367 sorted_pending_symbols_keys = self.pending_symbols.keys()
2368 sorted_pending_symbols_keys.sort()
2369 for sym in sorted_pending_symbols_keys:
2370 if open_symbols.has_key(sym): # sym is still open--don't close it.
2371 continue
2372 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2373 svn_commit.set_symbolic_name(sym)
2374 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2375 svn_commit.flush()
2376 self.done_symbols.append(sym)
2377 del self.pending_symbols[sym]
2380 class SymbolingsReader:
2381 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2382 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2383 returning the correct opening and closing Subversion revision
2384 numbers for a given symbolic name."""
2385 def __init__(self):
2386 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2387 reads the offsets database into memory."""
2388 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2389 # The offsets_db is really small, and we need to read and write
2390 # from it a fair bit, so suck it into memory
2391 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2392 self.offsets = { }
2393 for key in offsets_db.db.keys():
2394 #print " ZOO:", key, offsets_db[key]
2395 self.offsets[key] = offsets_db[key]
2397 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2398 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2399 SymbolicNameFillingGuide object.
2401 Note that if we encounter an opening rev in this fill, but the
2402 corresponding closing rev takes place later than SVN_REVNUM, the
2403 closing will not be passed to SymbolicNameFillingGuide in this
2404 fill (and will be discarded when encountered in a later fill).
2405 This is perfectly fine, because we can still do a valid fill
2406 without the closing--we always try to fill what we can as soon as
2407 we can."""
2408 # It's possible to have a branch start with a file that was added
2409 # on a branch
2410 if not self.offsets.has_key(symbolic_name):
2411 return SymbolicNameFillingGuide(symbolic_name)
2412 # set our read offset for self.symbolings to the offset for
2413 # symbolic_name
2414 self.symbolings.seek(self.offsets[symbolic_name])
2416 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2417 while (1):
2418 fpos = self.symbolings.tell()
2419 line = self.symbolings.readline().rstrip()
2420 if not line:
2421 break
2422 name, revnum, type, svn_path = line.split(" ", 3)
2423 revnum = int(revnum)
2424 if (revnum > svn_revnum
2425 or name != symbolic_name):
2426 break
2427 symbol_fill.register(svn_path, revnum, type)
2429 # get current offset of the read marker and set it to the offset
2430 # for the beginning of the line we just read if we used anything
2431 # we read.
2432 if not symbol_fill.is_empty():
2433 self.offsets[symbolic_name] = fpos
2435 symbol_fill.make_node_tree()
2436 return symbol_fill
2439 class SymbolicNameFillingGuide:
2440 """A SymbolicNameFillingGuide is essentially a node tree
2441 representing the source paths to be copied to fill
2442 self.symbolic_name in the current SVNCommit.
2444 After calling self.register() on a series of openings and closings,
2445 call self.make_node_tree() to prepare self.node_tree for
2446 examination. See the docstring for self.make_node_tree() for
2447 details on the structure of self.node_tree.
2449 By walking self.node_tree and calling self.get_best_revnum() on each
2450 node, the caller can determine what subversion revision number to
2451 copy the path corresponding to that node from. self.node_tree
2452 should be treated as read-only.
2454 The caller can then descend to sub-nodes to see if their "best
2455 revnum" differs from their parents' and if it does, take appropriate
2456 actions to "patch up" the subtrees."""
2457 def __init__(self, symbolic_name):
2458 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2459 prepares it for receiving openings and closings.
2461 Returns a fully functional and armed SymbolicNameFillingGuide
2462 object."""
2463 self.name = symbolic_name
2465 self.opening_key = "/o"
2466 self.closing_key = "/c"
2468 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2470 # { svn_path : { self.opening_key : svn_revnum,
2471 # self.closing_key : svn_revnum }
2472 # ...}
2473 self.things = { }
2475 # The key for the root node of the node tree
2476 self.root_key = '0'
2477 # The dictionary that holds our node tree, seeded with the root key.
2478 self.node_tree = { self.root_key : { } }
2480 def get_best_revnum(self, node, preferred_revnum):
2481 """Determine the best subversion revision number to use when
2482 copying the source tree beginning at NODE. Returns a
2483 subversion revision number.
2485 PREFERRED_REVNUM is passed to self._best_rev and used to
2486 calculate the best_revnum."""
2487 revnum = SVN_INVALID_REVNUM
2489 # Aggregate openings and closings from the rev tree
2490 openings = self._list_revnums_for_key(node, self.opening_key)
2491 closings = self._list_revnums_for_key(node, self.closing_key)
2493 # Score the lists
2494 scores = self._score_revisions(self._sum_revnum_counts(openings),
2495 self._sum_revnum_counts(closings))
2497 revnum, max_score = self._best_rev(scores, preferred_revnum)
2499 if revnum == SVN_INVALID_REVNUM:
2500 sys.stderr.write(error_prefix + ": failed to find a revision "
2501 + "to copy from when copying %s\n" % name)
2502 sys.exit(1)
2503 return revnum, max_score
2506 def _best_rev(self, scores, preferred_rev):
2507 """Return the revision with the highest score from SCORES, a list
2508 returned by _score_revisions(). When the maximum score is shared
2509 by multiple revisions, the oldest revision is selected, unless
2510 PREFERRED_REV is one of the possibilities, in which case, it is
2511 selected."""
2512 max_score = 0
2513 preferred_rev_score = -1
2514 rev = SVN_INVALID_REVNUM
2515 if preferred_rev is None:
2516 # Comparison order of different types is arbitrary. Do not
2517 # expect None to compare less than int values below.
2518 # In Python 2.3 None compares with ints like negative infinity.
2519 # In Python 2.0 None compares with ints like positive infinity.
2520 preferred_rev = SVN_INVALID_REVNUM
2521 for revnum, count in scores:
2522 if count > max_score:
2523 max_score = count
2524 rev = revnum
2525 if revnum <= preferred_rev:
2526 preferred_rev_score = count
2527 if preferred_rev_score == max_score:
2528 rev = preferred_rev
2529 return rev, max_score
2532 def _score_revisions(self, openings, closings):
2533 """Return a list of revisions and scores based on OPENINGS and
2534 CLOSINGS. The returned list looks like:
2536 [(REV1 SCORE1), (REV2 SCORE2), ...]
2538 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2539 self.opening__key and self.closing_key from some file or
2540 directory node, or else None.
2542 Each score indicates that copying the corresponding revision (or
2543 any following revision up to the next revision in the list) of the
2544 object in question would yield that many correct paths at or
2545 underneath the object. There may be other paths underneath it
2546 which are not correct and would need to be deleted or recopied;
2547 those can only be detected by descending and examining their
2548 scores.
2550 If OPENINGS is false, return the empty list."""
2551 # First look for easy outs.
2552 if not openings:
2553 return []
2555 # Must be able to call len(closings) below.
2556 if closings is None:
2557 closings = []
2559 # No easy out, so wish for lexical closures and calculate the scores :-).
2560 scores = []
2561 opening_score_accum = 0
2562 for i in range(len(openings)):
2563 opening_rev, opening_score = openings[i]
2564 opening_score_accum = opening_score_accum + opening_score
2565 scores.append((opening_rev, opening_score_accum))
2566 min = 0
2567 for i in range(len(closings)):
2568 closing_rev, closing_score = closings[i]
2569 done_exact_rev = None
2570 insert_index = None
2571 insert_score = None
2572 for j in range(min, len(scores)):
2573 score_rev, score = scores[j]
2574 if score_rev >= closing_rev:
2575 if not done_exact_rev:
2576 if score_rev > closing_rev:
2577 insert_index = j
2578 insert_score = scores[j-1][1] - closing_score
2579 done_exact_rev = 1
2580 scores[j] = (score_rev, score - closing_score)
2581 else:
2582 min = j + 1
2583 if not done_exact_rev:
2584 scores.append((closing_rev,scores[-1][1] - closing_score))
2585 if insert_index is not None:
2586 scores.insert(insert_index, (closing_rev, insert_score))
2587 return scores
2589 def _sum_revnum_counts(self, rev_list):
2590 """Takes an array of revisions (REV_LIST), for example:
2592 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2594 and adds up every occurrence of each revision and returns a sorted
2595 array of tuples containing (svn_revnum, count):
2597 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2599 s = {}
2600 for k in rev_list: # Add up the scores
2601 if s.has_key(k):
2602 s[k] = s[k] + 1
2603 else:
2604 s[k] = 1
2605 a = s.items()
2606 a.sort()
2607 return a
2609 def _list_revnums_for_key(self, node, revnum_type_key):
2610 """Scan self.node_tree and return a list of all the revision
2611 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2612 for all leaf nodes at and under NODE.
2614 REVNUM_TYPE_KEY should be either self.opening_key or
2615 self.closing_key."""
2616 revnums = []
2618 # If the node has self.opening_key, it must be a leaf node--all
2619 # leaf nodes have at least an opening key (although they may not
2620 # have a closing key. Fetch revnum and return
2621 if (self.node_tree[node].has_key(self.opening_key) and
2622 self.node_tree[node].has_key(revnum_type_key)):
2623 revnums.append(self.node_tree[node][revnum_type_key])
2624 return revnums
2626 for key, node_contents in self.node_tree[node].items():
2627 if key[0] == '/':
2628 continue
2629 revnums = revnums + \
2630 self._list_revnums_for_key(node_contents, revnum_type_key)
2631 return revnums
2633 def register(self, svn_path, svn_revnum, type):
2634 """Collects opening and closing revisions for this
2635 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2636 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2637 first svn revision number that we can copy from (our opening), or
2638 the last (not inclusive) svn revision number that we can copy from
2639 (our closing). TYPE indicates whether this path is an opening or a
2640 a closing.
2642 The opening for a given SVN_PATH must be passed before the closing
2643 for it to have any effect... any closing encountered before a
2644 corresponding opening will be discarded.
2646 It is not necessary to pass a corresponding closing for every
2647 opening.
2649 # Always log an OPENING
2650 if type == OPENING:
2651 self.things[svn_path] = {self.opening_key: svn_revnum}
2652 # Only log a closing if we've already registered the opening for that path.
2653 elif type == CLOSING and self.things.has_key(svn_path):
2654 # When we have a non-trunk default branch, we may have multiple
2655 # closings--only register the first closing we encounter.
2656 if not self.things[svn_path].has_key(self.closing_key):
2657 self.things[svn_path][self.closing_key] = svn_revnum
2659 def make_node_tree(self):
2660 """Generates the SymbolicNameFillingGuide's node tree from
2661 self.things. Each leaf node maps self.opening_key to the earliest
2662 subversion revision from which this node/path may be copied; and
2663 optionally map self.closing_key to the subversion revision one
2664 higher than the last revision from which this node/path may be
2665 copied. Intermediate nodes never contain opening or closing
2666 flags."""
2668 for svn_path, open_close in self.things.items():
2669 parent_key = self.root_key
2671 path_so_far = ""
2672 # Walk up the path, one node at a time.
2673 components = svn_path.split('/')
2674 for component in components:
2675 path_so_far = path_so_far + '/' + component
2677 child_key = None
2678 if not self.node_tree[parent_key].has_key(component):
2679 child_key = gen_key()
2680 self.node_tree[child_key] = { }
2681 self.node_tree[parent_key][component] = child_key
2682 else:
2683 child_key = self.node_tree[parent_key][component]
2685 parent_key = child_key
2686 # Having reached the leaf, attach the value
2687 self.node_tree[parent_key] = open_close
2688 #print_node_tree(self.node_tree, self.root_key)
2690 def is_empty(self):
2691 """Return true if we haven't accumulated any openings or closings,
2692 false otherwise."""
2693 return not len(self.things)
2696 class FillSource:
2697 """Representation of a fill source used by the symbol filler in
2698 SVNRepositoryMirror."""
2699 def __init__(self, prefix, key):
2700 """Create an unscored fill source with a prefix and a key."""
2701 self.prefix = prefix
2702 self.key = key
2703 self.score = None
2704 self.revnum = None
2706 def set_score(self, score, revnum):
2707 """Set the SCORE and REVNUM."""
2708 self.score = score
2709 self.revnum = revnum
2711 def __cmp__(self, other):
2712 """Comparison operator used to sort FillSources in descending
2713 score order."""
2714 if self.score is None or other.score is None:
2715 raise TypeError, 'Tried to compare unscored FillSource'
2716 return cmp(other.score, self.score)
2719 class SVNRepositoryMirror:
2720 """Mirror a Subversion Repository as it is constructed, one
2721 SVNCommit at a time. The mirror is skeletal; it does not contain
2722 file contents. The creation of a dumpfile or Subversion repository
2723 is handled by delegates. See self.add_delegate method for how to
2724 set delegates.
2726 The structure of the repository is kept in two databases and one
2727 hash. The revs_db database maps revisions to root node keys, and
2728 the nodes_db database maps node keys to nodes. A node is a hash
2729 from directory names to keys. Both the revs_db and the nodes_db are
2730 stored on disk and each access is expensive.
2732 The nodes_db database only has the keys for old revisions. The
2733 revision that is being contructed is kept in memory in the new_nodes
2734 hash which is cheap to access.
2736 You must invoke _start_commit between SVNCommits.
2738 *** WARNING *** All path arguments to methods in this class CANNOT
2739 have leading or trailing slashes.
2742 class SVNRepositoryMirrorPathExistsError(Exception):
2743 """Exception raised if an attempt is made to add a path to the
2744 repository mirror and that path already exists in the youngest
2745 revision of the repository."""
2746 pass
2748 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2749 """Exception raised if a CVSRevision is found to have an unexpected
2750 operation (OP) value."""
2751 pass
2753 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2754 """Exception raised if an empty SymbolicNameFillingGuide is returned
2755 during a fill where the branch in question already exists."""
2756 pass
2758 def __init__(self):
2759 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2760 self.delegates = [ ]
2762 # This corresponds to the 'revisions' table in a Subversion fs.
2763 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
2764 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
2766 # This corresponds to the 'nodes' table in a Subversion fs. (We
2767 # don't need a 'representations' or 'strings' table because we
2768 # only track metadata, not file contents.)
2769 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
2770 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
2772 # Start at revision 0 without a root node. It will be created
2773 # by _open_writable_root_node.
2774 self.youngest = 0
2775 self.new_root_key = None
2776 self.new_nodes = { }
2778 if not Ctx().trunk_only:
2779 ###PERF IMPT: Suck this into memory.
2780 self.tags_db = TagsDatabase(DB_OPEN_READ)
2781 self.symbolings_reader = SymbolingsReader()
2783 def _initialize_repository(self, date):
2784 """Initialize the repository by creating the directories for
2785 trunk, tags, and branches. This method should only be called
2786 after all delegates are added to the repository mirror."""
2787 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2788 # magic therein
2789 svn_commit = SVNCommit("Initialization", 1)
2790 svn_commit.set_date(date)
2791 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2793 self._start_commit(svn_commit)
2794 self._mkdir(Ctx().trunk_base)
2795 if not Ctx().trunk_only:
2796 self._mkdir(Ctx().branches_base)
2797 self._mkdir(Ctx().tags_base)
2799 def _start_commit(self, svn_commit):
2800 """Start a new commit."""
2801 if self.youngest > 0:
2802 self._end_commit()
2804 self.youngest = svn_commit.revnum
2805 self.new_root_key = None
2806 self.new_nodes = { }
2808 self._invoke_delegates('start_commit', svn_commit)
2810 def _end_commit(self):
2811 """Called at the end of each commit. This method copies the newly
2812 created nodes to the on-disk nodes db."""
2813 if self.new_root_key is None:
2814 # No changes were made in this revision, so we make the root node
2815 # of the new revision be the same as the last one.
2816 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2817 else:
2818 self.revs_db[str(self.youngest)] = self.new_root_key
2819 # Copy the new nodes to the nodes_db
2820 for key, value in self.new_nodes.items():
2821 self.nodes_db[key] = value
2823 def _get_node(self, key):
2824 """Returns the node contents for KEY which may refer to either
2825 self.nodes_db or self.new_nodes."""
2826 if self.new_nodes.has_key(key):
2827 return self.new_nodes[key]
2828 else:
2829 return self.nodes_db[key]
2831 def _open_readonly_node(self, path, revnum):
2832 """Open a readonly node for PATH at revision REVNUM. Returns the
2833 node key and node contents if the path exists, else (None, None)."""
2834 # Get the root key
2835 if revnum == self.youngest:
2836 if self.new_root_key is None:
2837 node_key = self.revs_db[str(self.youngest - 1)]
2838 else:
2839 node_key = self.new_root_key
2840 else:
2841 node_key = self.revs_db[str(revnum)]
2843 for component in path.split('/'):
2844 node_contents = self._get_node(node_key)
2845 if not node_contents.has_key(component):
2846 return None
2847 node_key = node_contents[component]
2849 return node_key
2851 def _open_writable_root_node(self):
2852 """Open a writable root node. The current root node is returned
2853 immeditely if it is already writable. If not, create a new one by
2854 copying the contents of the root node of the previous version."""
2855 if self.new_root_key is not None:
2856 return self.new_root_key, self.new_nodes[self.new_root_key]
2858 if self.youngest < 2:
2859 new_contents = { }
2860 else:
2861 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2862 self.new_root_key = gen_key()
2863 self.new_nodes = { self.new_root_key: new_contents }
2865 return self.new_root_key, new_contents
2867 def _open_writable_node(self, svn_path, create):
2868 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2869 and any missing directories if CREATE is True."""
2870 parent_key, parent_contents = self._open_writable_root_node()
2872 # Walk up the path, one node at a time.
2873 path_so_far = None
2874 components = svn_path.split('/')
2875 for i in range(len(components)):
2876 component = components[i]
2877 this_key = this_contents = None
2878 path_so_far = _path_join(path_so_far, component)
2879 if parent_contents.has_key(component):
2880 # The component exists.
2881 this_key = parent_contents[component]
2882 if self.new_nodes.has_key(this_key):
2883 this_contents = self.new_nodes[this_key]
2884 else:
2885 # Suck the node from the nodes_db, but update the key
2886 this_contents = self.nodes_db[this_key]
2887 this_key = gen_key()
2888 self.new_nodes[this_key] = this_contents
2889 parent_contents[component] = this_key
2890 elif create:
2891 # The component does not exists, so we create it.
2892 this_contents = { }
2893 this_key = gen_key()
2894 self.new_nodes[this_key] = this_contents
2895 parent_contents[component] = this_key
2896 if i < len(components) - 1:
2897 self._invoke_delegates('mkdir', path_so_far)
2898 else:
2899 # The component does not exists and we are not instructed to
2900 # create it, so we give up.
2901 return None, None
2903 parent_key = this_key
2904 parent_contents = this_contents
2906 return this_key, this_contents
2908 def _path_exists(self, path):
2909 """If PATH exists in self.youngest of the svn repository mirror,
2910 return true, else return None.
2912 PATH must not start with '/'."""
2913 return self._open_readonly_node(path, self.youngest) is not None
2915 def _fast_delete_path(self, parent_path, parent_contents, component):
2916 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2917 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2918 in PARENT_CONTENTS."""
2919 if parent_contents.has_key(component):
2920 del parent_contents[component]
2921 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2923 def _delete_path(self, svn_path, should_prune=False):
2924 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2925 all ancestor directories that are made empty when SVN_PATH is deleted.
2926 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2928 NOTE: This function does *not* allow you delete top-level entries
2929 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2930 those entries."""
2931 pos = svn_path.rfind('/')
2932 parent_path = svn_path[:pos]
2933 entry = svn_path[pos+1:]
2934 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2935 if parent_key is not None:
2936 self._fast_delete_path(parent_path, parent_contents, entry)
2937 # The following recursion makes pruning an O(n^2) operation in the
2938 # worst case (where n is the depth of SVN_PATH), but the worst case
2939 # is probably rare, and the constant cost is pretty low. Another
2940 # drawback is that we issue a delete for each path and not just
2941 # a single delete for the topmost directory pruned.
2942 if (should_prune and len(parent_contents) == 0 and
2943 parent_path.find('/') != -1):
2944 self._delete_path(parent_path, True)
2946 def _mkdir(self, path):
2947 """Create PATH in the repository mirror at the youngest revision."""
2948 self._open_writable_node(path, True)
2949 self._invoke_delegates('mkdir', path)
2951 def _change_path(self, cvs_rev):
2952 """Register a change in self.youngest for the CVS_REV's svn_path
2953 in the repository mirror."""
2954 # We do not have to update the nodes because our mirror is only
2955 # concerned with the presence or absence of paths, and a file
2956 # content change does not cause any path changes.
2957 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
2959 def _add_path(self, cvs_rev):
2960 """Add the CVS_REV's svn_path to the repository mirror."""
2961 self._open_writable_node(cvs_rev.svn_path, True)
2962 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
2964 def _copy_path(self, src_path, dest_path, src_revnum):
2965 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
2966 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
2967 parent *must* exist, but DEST_PATH *cannot* exist.
2969 Return the node key and the contents of the new node at DEST_PATH
2970 as a dictionary."""
2971 # get the contents of the node of our src_path
2972 src_key = self._open_readonly_node(src_path, src_revnum)
2973 src_contents = self._get_node(src_key)
2975 # Get the parent path and the base path of the dest_path
2976 pos = dest_path.rindex('/')
2977 dest_parent = dest_path[:pos]
2978 dest_basename = dest_path[pos+1:]
2979 dest_parent_key, dest_parent_contents = \
2980 self._open_writable_node(dest_parent, False)
2982 if dest_parent_contents.has_key(dest_basename):
2983 msg = "Attempt to add path '%s' to repository mirror " % dest_path
2984 msg = msg + "when it already exists in the mirror."
2985 raise self.SVNRepositoryMirrorPathExistsError, msg
2987 dest_parent_contents[dest_basename] = src_key
2988 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
2990 # Yes sir, src_key and src_contents are also the contents of the
2991 # destination. This is a cheap copy, remember! :-)
2992 return src_key, src_contents
2994 def _fill_symbolic_name(self, svn_commit):
2995 """Performs all copies necessary to create as much of the the tag
2996 or branch SVN_COMMIT.symbolic_name as possible given the current
2997 revision of the repository mirror.
2999 The symbolic name is guaranteed to exist in the Subversion
3000 repository by the end of this call, even if there are no paths
3001 under it."""
3002 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3003 svn_commit.symbolic_name, self.youngest)
3005 # Create the list of sources for the symbolic name. All source
3006 # prefixes must be direct sources for the destination, i.e. we
3007 # must have 'trunk' and 'branches/my_branch' and not just
3008 # 'branches'.
3009 sources = []
3010 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
3011 if entry == Ctx().trunk_base:
3012 sources.append(FillSource(entry, key))
3013 elif entry == Ctx().branches_base:
3014 for entry2, key2 in symbol_fill.node_tree[key].items():
3015 sources.append(FillSource(entry + '/' + entry2, key2))
3016 else:
3017 raise # Should never happen
3018 if self.tags_db.has_key(svn_commit.symbolic_name):
3019 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
3020 else:
3021 dest_prefix = _path_join(Ctx().branches_base,
3022 svn_commit.symbolic_name)
3024 if sources:
3025 dest_key = self._open_writable_node(dest_prefix, False)[0]
3026 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3027 else:
3028 # We can only get here for a branch whose first commit is an add
3029 # (as opposed to a copy).
3030 dest_path = Ctx().branches_base + '/' + symbol_fill.name
3031 if not self._path_exists(dest_path):
3032 # If our symbol_fill was empty, that means that our first
3033 # commit on the branch was to a file added on the branch, and
3034 # that this is our first fill of that branch.
3036 # This case is covered by test 16.
3038 # ...we create the branch by copying trunk from the our
3039 # current revision number minus 1
3040 source_path = Ctx().trunk_base
3041 entries = self._copy_path(source_path, dest_path,
3042 svn_commit.revnum - 1)[1]
3043 # Now since we've just copied trunk to a branch that's
3044 # *supposed* to be empty, we delete any entries in the
3045 # copied directory.
3046 for entry in entries.keys():
3047 del_path = dest_path + '/' + entry
3048 # Delete but don't prune.
3049 self._delete_path(del_path)
3050 else:
3051 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
3052 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3053 msg = msg + "attempted to create a branch that already exists."
3054 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3056 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3057 path = None, parent_source_prefix = None,
3058 preferred_revnum = None, prune_ok = None):
3059 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3060 SOURCES, and recurse into the child items.
3062 DEST_PREFIX is the prefix of the destination directory, e.g.
3063 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3064 FillSource classes that are candidates to be copied to the
3065 destination. DEST_KEY is the key in self.nodes_db to the
3066 destination, or None if the destination does not yet exist.
3068 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3069 are at the top level, e.g. '/tags/my_tag'.
3071 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3072 the parent directory, and PREFERRED_REVNUM is an int which is the
3073 source revision number that the caller (who may have copied KEY's
3074 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3075 then no revision is preferable to any other (which probably means
3076 that no copies have happened yet).
3078 PRUNE_OK means that a copy has been made in this recursion, and
3079 it's safe to prune directories that are not in
3080 SYMBOL_FILL.node_tree, provided that said directory has a source
3081 prefix of one of the PARENT_SOURCE_PREFIX.
3083 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3084 should only be passed in by recursive calls."""
3085 # Calculate scores and revnums for all sources
3086 for source in sources:
3087 src_revnum, score = symbol_fill.get_best_revnum(source.key,
3088 preferred_revnum)
3089 source.set_score(score, src_revnum)
3091 # Sort the sources in descending score order so that we will make
3092 # a eventual copy from the source with the highest score.
3093 sources.sort()
3094 copy_source = sources[0]
3096 src_path = _path_join(copy_source.prefix, path)
3097 dest_path = _path_join(dest_prefix, path)
3099 # Figure out if we shall copy to this destination and delete any
3100 # destination path that is in the way.
3101 do_copy = 0
3102 if dest_key is None:
3103 do_copy = 1
3104 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3105 copy_source.revnum != preferred_revnum):
3106 # We are about to replace the destination, so we need to remove
3107 # it before we perform the copy.
3108 self._delete_path(dest_path)
3109 do_copy = 1
3111 if do_copy:
3112 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3113 copy_source.revnum)
3114 prune_ok = 1
3115 else:
3116 dest_entries = self._get_node(dest_key)
3118 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3119 # elements and the values are lists of FillSource classes where
3120 # this path element exists.
3121 src_entries = {}
3122 for source in sources:
3123 for entry, key in symbol_fill.node_tree[source.key].items():
3124 if entry[0] == '/': # Skip flags
3125 continue
3126 if not src_entries.has_key(entry):
3127 src_entries[entry] = []
3128 src_entries[entry].append(FillSource(source.prefix, key))
3130 if prune_ok:
3131 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3132 delete_list = [ ]
3133 for entry in dest_entries.keys():
3134 if not src_entries.has_key(entry):
3135 delete_list.append(entry)
3136 if delete_list:
3137 if not self.new_nodes.has_key(dest_key):
3138 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3139 # Sort the delete list to get "diffable" dumpfiles.
3140 delete_list.sort()
3141 for entry in delete_list:
3142 self._fast_delete_path(dest_path, dest_entries, entry)
3144 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3145 src_keys = src_entries.keys()
3146 src_keys.sort()
3147 for src_key in src_keys:
3148 if dest_entries.has_key(src_key):
3149 next_dest_key = dest_entries[src_key]
3150 else:
3151 next_dest_key = None
3152 self._fill(symbol_fill, dest_prefix, next_dest_key,
3153 src_entries[src_key], _path_join(path, src_key),
3154 copy_source.prefix, sources[0].revnum, prune_ok)
3156 def _synchronize_default_branch(self, svn_commit):
3157 """Propagate any changes that happened on a non-trunk default
3158 branch to the trunk of the repository. See
3159 CVSCommit._post_commit() for details on why this is necessary."""
3160 for cvs_rev in svn_commit.cvs_revs:
3161 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3162 if self._path_exists(cvs_rev.svn_trunk_path):
3163 # Delete the path on trunk...
3164 self._delete_path(cvs_rev.svn_trunk_path)
3165 # ...and copy over from branch
3166 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
3167 svn_commit.motivating_revnum)
3168 elif cvs_rev.op == OP_DELETE:
3169 # delete trunk path
3170 self._delete_path(cvs_rev.svn_trunk_path)
3171 else:
3172 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3173 % cvs_rev.op)
3174 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3176 def commit(self, svn_commit):
3177 """Add an SVNCommit to the SVNRepository, incrementing the
3178 Repository revision number, and changing the repository. Invoke
3179 the delegates' _start_commit() method."""
3181 if svn_commit.revnum == 2:
3182 self._initialize_repository(svn_commit.get_date())
3184 self._start_commit(svn_commit)
3186 if svn_commit.symbolic_name:
3187 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3188 svn_commit.symbolic_name)
3189 self._fill_symbolic_name(svn_commit)
3190 elif svn_commit.motivating_revnum:
3191 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3192 % svn_commit.motivating_revnum)
3193 self._synchronize_default_branch(svn_commit)
3194 else: # This actually commits CVSRevisions
3195 if len(svn_commit.cvs_revs) > 1: plural = "s"
3196 else: plural = ""
3197 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3198 % (len(svn_commit.cvs_revs), plural))
3199 for cvs_rev in svn_commit.cvs_revs:
3200 # See comment in CVSCommit._commit() for what this is all
3201 # about. Note that although asking self._path_exists() is
3202 # somewhat expensive, we only do it if the first two (cheap)
3203 # tests succeed first.
3204 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3205 and (cvs_rev.rev == "1.1.1.1")
3206 and self._path_exists(cvs_rev.svn_path)):
3207 if cvs_rev.op == OP_ADD:
3208 self._add_path(cvs_rev)
3209 elif cvs_rev.op == OP_CHANGE:
3210 # Fix for Issue #74:
3212 # Here's the scenario. You have file FOO that is imported
3213 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3214 # the file exists.
3216 # Moving forward in time, FOO is deleted on the default
3217 # branch (r1.1.1.2). cvs2svn determines that this delete
3218 # also needs to happen on trunk, so FOO is deleted on
3219 # trunk.
3221 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3222 # not 'dead', we assume it's a change). However, since
3223 # our trunk file has been deleted, svnadmin blows up--you
3224 # can't change a file that doesn't exist!
3226 # Soooo... we just check the path, and if it doesn't
3227 # exist, we do an add... if the path does exist, it's
3228 # business as usual.
3229 if not self._path_exists(cvs_rev.svn_path):
3230 self._add_path(cvs_rev)
3231 else:
3232 self._change_path(cvs_rev)
3234 if cvs_rev.op == OP_DELETE:
3235 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3237 def cleanup(self):
3238 """Callback for the Cleanup.register in self.__init__."""
3239 self.revs_db = None
3240 self.nodes_db = None
3242 def add_delegate(self, delegate):
3243 """Adds DELEGATE to self.delegates.
3245 For every delegate you add, as soon as SVNRepositoryMirror
3246 performs a repository action method, SVNRepositoryMirror will call
3247 the delegate's corresponding repository action method. Multiple
3248 delegates will be called in the order that they are added. See
3249 SVNRepositoryMirrorDelegate for more information."""
3250 self.delegates.append(delegate)
3252 def _invoke_delegates(self, method, *args):
3253 """Iterate through each of our delegates, in the order that they
3254 were added, and call the delegate's method named METHOD with the
3255 arguments in ARGS."""
3256 for delegate in self.delegates:
3257 getattr(delegate, method)(*args)
3259 def finish(self):
3260 """Calls the delegate finish method."""
3261 self._end_commit()
3262 self._invoke_delegates('finish')
3263 self.cleanup()
3266 class SVNCommitItem:
3267 """A wrapper class for CVSRevision objects upon which
3268 Subversion-related data (such as properties) may be hung."""
3270 def __init__(self, c_rev, make_svn_props):
3271 self.c_rev = c_rev
3272 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3273 self.eol_from_mime_type = Ctx().eol_from_mime_type
3274 self.no_default_eol = Ctx().no_default_eol
3275 self.keywords_off = Ctx().keywords_off
3276 self.mime_mapper = Ctx().mime_mapper
3278 # We begin with only a "CVS revision" property.
3279 self.svn_props = { }
3280 if self.set_cvs_revnum_properties:
3281 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3282 make_svn_props = True
3284 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3285 if make_svn_props:
3286 # Tack on the executableness, if any.
3287 if c_rev.file_executable:
3288 self.svn_props['svn:executable'] = '*'
3290 # Set the svn:keywords property, if appropriate. See issue #2.
3291 if c_rev.mode is None or c_rev.mode == 'kv' or c_rev.mode == 'kvl':
3292 if not self.keywords_off:
3293 self.svn_props['svn:keywords'] = 'Author Date Id Revision'
3295 # Set mime-type and eol. These two properties are intertwingled;
3296 # follow the conditionals carefully. See also issue #39.
3297 mime_type = None
3298 eol_style = None
3300 if self.mime_mapper:
3301 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3303 if not c_rev.mode == 'b':
3304 if not self.no_default_eol:
3305 eol_style = 'native'
3306 elif mime_type and self.eol_from_mime_type:
3307 if mime_type.startswith("text/"):
3308 eol_style = 'native'
3309 else:
3310 eol_style = None
3311 elif mime_type is None:
3312 # file is kb, and no other mimetype specified
3313 mime_type = 'application/octet-stream'
3315 if mime_type:
3316 self.svn_props['svn:mime-type'] = mime_type
3318 if eol_style:
3319 self.svn_props['svn:eol-style'] = eol_style
3322 class SVNRepositoryMirrorDelegate:
3323 """Abstract superclass for any delegate to SVNRepositoryMirror.
3324 Subclasses must implement all of the methods below.
3326 For each method, a subclass implements, in its own way, the
3327 Subversion operation implied by the method's name. For example, for
3328 the add_path method, the DumpfileDelegate would write out a
3329 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3330 would merely print that the path is being added to the repository,
3331 and the RepositoryDelegate would actually cause the path to be added
3332 to the Subversion repository that it is creating.
3335 def start_commit(self, svn_commit):
3336 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3337 see subclass implementation for details."""
3338 raise NotImplementedError
3340 def mkdir(self, path):
3341 """PATH is a string; see subclass implementation for details."""
3342 raise NotImplementedError
3344 def add_path(self, s_item):
3345 """S_ITEM is an SVNCommitItem; see subclass implementation for
3346 details."""
3347 raise NotImplementedError
3349 def change_path(self, s_item):
3350 """S_ITEM is an SVNCommitItem; see subclass implementation for
3351 details."""
3352 raise NotImplementedError
3354 def delete_path(self, path):
3355 """PATH is a string; see subclass implementation for
3356 details."""
3357 raise NotImplementedError
3359 def copy_path(self, src_path, dest_path, src_revnum):
3360 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3361 subversion revision number (int); see subclass implementation for
3362 details."""
3363 raise NotImplementedError
3365 def finish(self):
3366 """Perform any cleanup necessary after all revisions have been
3367 committed."""
3368 raise NotImplementedError
3371 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3372 """Create a Subversion dumpfile."""
3374 def __init__(self, dumpfile_path=None):
3375 """Return a new DumpfileDelegate instance, attached to a dumpfile
3376 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3378 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3379 property on files, when they are changed due to a corresponding
3380 CVS revision.
3382 If Ctx().mime_mapper is not None, then it is a MimeMapper
3383 instance, used to determine whether or not to set the
3384 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3385 is None, files marked with the CVS 'kb' flag will receive a mime
3386 type of "application/octet-stream".
3388 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3389 'native' for files not marked with the CVS 'kb' flag, except as
3390 superseded by Ctx().eol_from_mime_type (see below).
3392 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3393 to 'native' for all files to which Ctx().mime_mapper assigns a
3394 mime type beginning with "text/", and don't set 'svn:eol-style'
3395 for files assigned a type not beginning with "text/".
3396 """
3397 if dumpfile_path:
3398 self.dumpfile_path = dumpfile_path
3399 else:
3400 self.dumpfile_path = Ctx().dumpfile
3401 self.path_encoding = Ctx().encoding
3403 self.dumpfile = open(self.dumpfile_path, 'wb')
3404 self._write_dumpfile_header(self.dumpfile)
3406 def _write_dumpfile_header(self, dumpfile):
3407 # Initialize the dumpfile with the standard headers.
3409 # Since the CVS repository doesn't have a UUID, and the Subversion
3410 # repository will be created with one anyway, we don't specify a
3411 # UUID in the dumpflie
3412 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3414 def _utf8_path(self, path):
3415 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3416 encoded in self.path_encoding."""
3417 try:
3418 # Log messages can be converted with the 'replace' strategy,
3419 # but we can't afford any lossiness here.
3420 unicode_path = unicode(path, self.path_encoding, 'strict')
3421 return unicode_path.encode('utf-8')
3422 except UnicodeError:
3423 print "Unable to convert a path '%s' to internal encoding." % path
3424 print "Consider rerunning with (for example) '--encoding=latin1'"
3425 sys.exit(1)
3427 def start_commit(self, svn_commit):
3428 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3430 self.revision = svn_commit.revnum
3432 # The start of a new commit typically looks like this:
3434 # Revision-number: 1
3435 # Prop-content-length: 129
3436 # Content-length: 129
3438 # K 7
3439 # svn:log
3440 # V 27
3441 # Log message for revision 1.
3442 # K 10
3443 # svn:author
3444 # V 7
3445 # jrandom
3446 # K 8
3447 # svn:date
3448 # V 27
3449 # 2003-04-22T22:57:58.132837Z
3450 # PROPS-END
3452 # Notice that the length headers count everything -- not just the
3453 # length of the data but also the lengths of the lengths, including
3454 # the 'K ' or 'V ' prefixes.
3456 # The reason there are both Prop-content-length and Content-length
3457 # is that the former includes just props, while the latter includes
3458 # everything. That's the generic header form for any entity in a
3459 # dumpfile. But since revisions only have props, the two lengths
3460 # are always the same for revisions.
3462 # Calculate the total length of the props section.
3463 props = svn_commit.get_revprops()
3464 prop_names = props.keys()
3465 prop_names.sort()
3466 total_len = 10 # len('PROPS-END\n')
3467 for propname in prop_names:
3468 if props[propname] is None:
3469 continue
3470 klen = len(propname)
3471 klen_len = len('K %d' % klen)
3472 vlen = len(props[propname])
3473 vlen_len = len('V %d' % vlen)
3474 # + 4 for the four newlines within a given property's section
3475 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3477 # Print the revision header and props
3478 self.dumpfile.write('Revision-number: %d\n'
3479 'Prop-content-length: %d\n'
3480 'Content-length: %d\n'
3481 '\n'
3482 % (self.revision, total_len, total_len))
3484 for propname in prop_names:
3485 if props[propname] is None:
3486 continue
3487 self.dumpfile.write('K %d\n'
3488 '%s\n'
3489 'V %d\n'
3490 '%s\n' % (len(propname),
3491 propname,
3492 len(props[propname]),
3493 props[propname]))
3495 self.dumpfile.write('PROPS-END\n')
3496 self.dumpfile.write('\n')
3498 def mkdir(self, path):
3499 """Emit the creation of directory PATH."""
3500 self.dumpfile.write("Node-path: %s\n"
3501 "Node-kind: dir\n"
3502 "Node-action: add\n"
3503 "Content-length: 10\n"
3504 "\n"
3505 "\n" % self._utf8_path(path))
3507 def _add_or_change_path(self, s_item, op):
3508 """Emit the addition or change corresponding to S_ITEM.
3509 OP is either the constant OP_ADD or OP_CHANGE."""
3511 # Validation stuffs
3512 if op == OP_ADD:
3513 action = 'add'
3514 elif op == OP_CHANGE:
3515 action = 'change'
3516 else:
3517 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3518 % (error_prefix, op))
3519 sys.exit(1)
3521 # Convenience variables
3522 c_rev = s_item.c_rev
3523 svn_props = s_item.svn_props
3525 # The property handling here takes advantage of an undocumented
3526 # but IMHO consistent feature of the Subversion dumpfile-loading
3527 # code. When a node's properties aren't mentioned (that is, the
3528 # "Prop-content-length:" header is absent, no properties are
3529 # listed at all, and there is no "PROPS-END\n" line) then no
3530 # change is made to the node's properties.
3532 # This is consistent with the way dumpfiles behave w.r.t. text
3533 # content changes, so I'm comfortable relying on it. If you
3534 # commit a change to *just* the properties of some node that
3535 # already has text contents from a previous revision, then in the
3536 # dumpfile output for the prop change, no "Text-content-length:"
3537 # nor "Text-content-md5:" header will be present, and the text of
3538 # the file will not be given. But this does not cause the file's
3539 # text to be erased! It simply remains unchanged.
3541 # This works out great for cvs2svn, due to lucky coincidences:
3543 # For files, the only properties we ever set are set in the first
3544 # revision; all other revisions (including on branches) inherit
3545 # from that. After the first revision, we never change file
3546 # properties, therefore, there is no need to remember the full set
3547 # of properties on a given file once we've set it.
3549 # For directories, the only property we set is "svn:ignore", and
3550 # while we may change it after the first revision, we always do so
3551 # based on the contents of a ".cvsignore" file -- in other words,
3552 # CVS is doing the remembering for us, so we still don't have to
3553 # preserve the previous value of the property ourselves.
3555 # Calculate the (sorted-by-name) property string and length, if any.
3556 prop_contents = ''
3557 prop_names = svn_props.keys()
3558 prop_names.sort()
3559 for pname in prop_names:
3560 pval = svn_props[pname]
3561 prop_contents = prop_contents + \
3562 'K %d\n%s\nV %d\n%s\n' \
3563 % (len(pname), pname, len(pval), pval)
3564 if prop_contents:
3565 prop_contents = prop_contents + 'PROPS-END\n'
3566 props_len = len(prop_contents)
3567 else:
3568 props_len = 0
3570 props_header = ''
3571 if props_len:
3572 props_header = 'Prop-content-length: %d\n' % props_len
3574 # treat .cvsignore as a directory property
3575 dir_path, basename = os.path.split(c_rev.svn_path)
3576 if basename == ".cvsignore":
3577 ignore_vals = generate_ignores(c_rev)
3578 ignore_contents = '\n'.join(ignore_vals)
3579 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3580 (len(ignore_contents), ignore_contents))
3581 ignore_contents = ignore_contents + 'PROPS-END\n'
3582 ignore_len = len(ignore_contents)
3584 # write headers, then props
3585 self.dumpfile.write('Node-path: %s\n'
3586 'Node-kind: dir\n'
3587 'Node-action: change\n'
3588 'Prop-content-length: %d\n'
3589 'Content-length: %d\n'
3590 '\n'
3591 '%s'
3592 % (self._utf8_path(dir_path), ignore_len,
3593 ignore_len, ignore_contents))
3595 pipe_cmd, pipe = get_co_pipe(c_rev)
3596 self.dumpfile.write('Node-path: %s\n'
3597 'Node-kind: file\n'
3598 'Node-action: %s\n'
3599 '%s' # no property header if no props
3600 'Text-content-length: '
3601 % (self._utf8_path(c_rev.svn_path),
3602 action, props_header))
3604 pos = self.dumpfile.tell()
3606 self.dumpfile.write('0000000000000000\n'
3607 'Text-content-md5: 00000000000000000000000000000000\n'
3608 'Content-length: 0000000000000000\n'
3609 '\n')
3611 if prop_contents:
3612 self.dumpfile.write(prop_contents)
3614 # Insert the rev contents, calculating length and checksum as we go.
3615 checksum = md5.new()
3616 length = 0
3617 normalize_crlf = sys.platform == "win32" \
3618 and svn_props.has_key('svn:eol-style')
3619 trailing_cr = ""
3620 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3621 while buf:
3622 if normalize_crlf:
3623 buf = string.replace(buf,"\r\n","\n")
3624 if buf[-1] == "\r":
3625 trailing_cr = "\r"
3626 buf = buf[:-1]
3627 else:
3628 trailing_cr = ""
3629 checksum.update(buf)
3630 length = length + len(buf)
3631 self.dumpfile.write(buf)
3632 # optimize because of python's immutable strings
3633 if trailing_cr:
3634 buf = trailing_cr + pipe.fromchild.read(PIPE_READ_SIZE)
3635 else:
3636 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3637 pipe.fromchild.close()
3638 error_output = pipe.childerr.read()
3639 exit_status = pipe.wait()
3640 if exit_status:
3641 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3642 "and the following output:\n"
3643 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3645 # Go back to patch up the length and checksum headers:
3646 self.dumpfile.seek(pos, 0)
3647 # We left 16 zeros for the text length; replace them with the real
3648 # length, padded on the left with spaces:
3649 self.dumpfile.write('%16d' % length)
3650 # 16... + 1 newline + len('Text-content-md5: ') == 35
3651 self.dumpfile.seek(pos + 35, 0)
3652 self.dumpfile.write(checksum.hexdigest())
3653 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3654 self.dumpfile.seek(pos + 84, 0)
3655 # The content length is the length of property data, text data,
3656 # and any metadata around/inside around them.
3657 self.dumpfile.write('%16d' % (length + props_len))
3658 # Jump back to the end of the stream
3659 self.dumpfile.seek(0, 2)
3661 # This record is done (write two newlines -- one to terminate
3662 # contents that weren't themselves newline-termination, one to
3663 # provide a blank line for readability.
3664 self.dumpfile.write('\n\n')
3666 def add_path(self, s_item):
3667 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
3668 self._add_or_change_path(s_item, OP_ADD)
3670 def change_path(self, s_item):
3671 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
3672 self._add_or_change_path(s_item, OP_CHANGE)
3674 def delete_path(self, path):
3675 """Emit the deletion of PATH."""
3676 self.dumpfile.write('Node-path: %s\n'
3677 'Node-action: delete\n'
3678 '\n' % self._utf8_path(path))
3680 def copy_path(self, src_path, dest_path, src_revnum):
3681 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3682 # We don't need to include "Node-kind:" for copies; the loader
3683 # ignores it anyway and just uses the source kind instead.
3684 self.dumpfile.write('Node-path: %s\n'
3685 'Node-action: add\n'
3686 'Node-copyfrom-rev: %d\n'
3687 'Node-copyfrom-path: /%s\n'
3688 '\n'
3689 % (self._utf8_path(dest_path),
3690 src_revnum,
3691 self._utf8_path(src_path)))
3693 def finish(self):
3694 """Perform any cleanup necessary after all revisions have been
3695 committed."""
3696 self.dumpfile.close()
3699 class RepositoryDelegate(DumpfileDelegate):
3700 """Creates a new Subversion Repository. DumpfileDelegate does all
3701 of the heavy lifting."""
3702 def __init__(self):
3703 self.svnadmin = Ctx().svnadmin
3704 self.target = Ctx().target
3705 if not Ctx().existing_svnrepos:
3706 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3707 if Ctx().fs_type and Ctx().fs_type != 'bdb':
3708 # User specified something other than bdb.
3709 run_command('%s create %s "%s"' % (self.svnadmin,
3710 "--fs-type=%s" % Ctx().fs_type,
3711 self.target))
3712 elif Ctx().fs_type:
3713 # User explicitly specified bdb.
3715 # Since this is a BDB repository, pass --bdb-txn-nosync,
3716 # because it gives us a 4-5x speed boost (if cvs2svn is
3717 # creating the repository, cvs2svn should be the only program
3718 # accessing the svn repository (until cvs is done, at least)).
3719 # But we'll turn no-sync off in self.finish(), unless
3720 # instructed otherwise.
3721 run_command('%s create %s %s "%s"' % (self.svnadmin,
3722 "--fs-type=bdb",
3723 "--bdb-txn-nosync",
3724 self.target))
3725 else:
3726 # User didn't say what kind repository (bdb, fsfs, etc).
3727 # We still pass --bdb-txn-nosync. It's a no-op if the default
3728 # repository type doesn't support it, but we definitely want
3729 # it if BDB is the default.
3730 run_command('%s create %s "%s"' % (self.svnadmin,
3731 "--bdb-txn-nosync",
3732 self.target))
3735 # Since the output of this run is a repository, not a dumpfile,
3736 # the temporary dumpfiles we create should go in the tmpdir.
3737 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
3739 # This is 1 if a commit is in progress, otherwise None.
3740 self._commit_in_progress = None
3742 self.dumpfile = open(self.dumpfile_path, 'w+b')
3743 self.loader_pipe = Popen3('%s load -q "%s"' % (self.svnadmin, self.target),
3744 True)
3745 self.loader_pipe.fromchild.close()
3746 try:
3747 self._write_dumpfile_header(self.loader_pipe.tochild)
3748 except IOError:
3749 sys.stderr.write("%s: svnadmin failed with the following output while "
3750 "loading the dumpfile:\n" % (error_prefix))
3751 sys.stderr.write(self.loader_pipe.childerr.read())
3752 sys.exit(1)
3754 def _feed_pipe(self):
3755 """Feed the revision stored in the dumpfile to the svnadmin
3756 load pipe."""
3757 self.dumpfile.seek(0)
3758 while 1:
3759 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3760 if not len(data):
3761 break
3762 try:
3763 self.loader_pipe.tochild.write(data)
3764 except IOError:
3765 sys.stderr.write("%s: svnadmin failed with the following output while "
3766 "loading the dumpfile:\n" % (error_prefix))
3767 sys.stderr.write(self.loader_pipe.childerr.read())
3768 sys.exit(1)
3770 def start_commit(self, svn_commit):
3771 """Start a new commit. If a commit is already in progress, close
3772 the dumpfile, load it into the svn repository, open a new
3773 dumpfile, and write the header into it."""
3774 if self._commit_in_progress:
3775 self._feed_pipe()
3776 self.dumpfile.seek(0)
3777 self.dumpfile.truncate()
3778 DumpfileDelegate.start_commit(self, svn_commit)
3779 self._commit_in_progress = 1
3781 def finish(self):
3782 """Loads the last commit into the repository."""
3783 self._feed_pipe()
3784 self.dumpfile.close()
3785 self.loader_pipe.tochild.close()
3786 error_output = self.loader_pipe.childerr.read()
3787 exit_status = self.loader_pipe.wait()
3788 if exit_status:
3789 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3790 'and the following output:\n'
3791 '%s' % (error_prefix, exit_status, error_output))
3792 os.remove(self.dumpfile_path)
3794 # If this is a BDB repository, and we created the repository, and
3795 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
3796 # line in the DB_CONFIG file, because txn syncing should be on by
3797 # default in BDB repositories.
3799 # We determine if this is a BDB repository by looking for the
3800 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
3801 # checking Ctx().fs_type. That way this code will Do The Right
3802 # Thing in all circumstances.
3803 db_config = os.path.join(self.target, "db/DB_CONFIG")
3804 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
3805 and os.path.exists(db_config)):
3806 no_sync = 'set_flags DB_TXN_NOSYNC\n'
3808 contents = open(db_config, 'r').readlines()
3809 index = contents.index(no_sync)
3810 contents[index] = '# ' + no_sync
3811 contents = open(db_config, 'w').writelines(contents)
3814 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3815 """Makes no changes to the disk, but writes out information to
3816 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3817 print statements will state that we're doing something, when in
3818 reality, we aren't doing anything other than printing out that we're
3819 doing something. Kind of zen, really."""
3820 def __init__(self, total_revs):
3821 self.total_revs = total_revs
3823 def start_commit(self, svn_commit):
3824 """Prints out the Subversion revision number of the commit that is
3825 being started."""
3826 Log().write(LOG_VERBOSE, "=" * 60)
3827 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3828 (svn_commit.revnum, self.total_revs))
3830 def mkdir(self, path):
3831 """Print a line stating that we are creating directory PATH."""
3832 Log().write(LOG_VERBOSE, " New Directory", path)
3834 def add_path(self, s_item):
3835 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
3836 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
3838 def change_path(self, s_item):
3839 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
3840 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
3842 def delete_path(self, path):
3843 """Print a line stating that we are 'deleting' PATH."""
3844 Log().write(LOG_VERBOSE, " Deleting", path)
3846 def copy_path(self, src_path, dest_path, src_revnum):
3847 """Print a line stating that we are 'copying' revision SRC_REVNUM
3848 of SRC_PATH to DEST_PATH."""
3849 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3850 Log().write(LOG_VERBOSE, " to", dest_path)
3852 def finish(self):
3853 """State that we are done creating our repository."""
3854 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3855 Log().write(LOG_QUIET, "Done.")
3857 # This should be a local to pass1,
3858 # but Python 2.0 does not support nested scopes.
3859 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3860 def pass1():
3861 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3862 cd = CollectData()
3864 def visit_file(baton, dirname, files):
3865 cd = baton
3866 for fname in files:
3867 if fname[-2:] != ',v':
3868 continue
3869 cd.found_valid_file = 1
3870 pathname = os.path.join(dirname, fname)
3871 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3872 # drop the 'Attic' portion from the pathname for the canonical name.
3873 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3874 else:
3875 # If this file also exists in the attic, it's a fatal error
3876 attic_path = os.path.join(dirname, 'Attic', fname)
3877 if os.path.exists(attic_path):
3878 err = "%s: A CVS repository cannot contain both %s and %s" \
3879 % (error_prefix, pathname, attic_path)
3880 sys.stderr.write(err + '\n')
3881 cd.fatal_errors.append(err)
3882 cd.set_fname(pathname, pathname)
3883 Log().write(LOG_NORMAL, pathname)
3884 try:
3885 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3886 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3887 err = "%s: '%s' is not a valid ,v file" \
3888 % (error_prefix, pathname)
3889 sys.stderr.write(err + '\n')
3890 cd.fatal_errors.append(err)
3891 except:
3892 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3893 raise
3895 os.path.walk(Ctx().cvsroot, visit_file, cd)
3896 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3898 cd.write_symbol_db()
3900 if len(cd.fatal_errors) > 0:
3901 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3902 + "Error summary:\n"
3903 + "\n".join(cd.fatal_errors)
3904 + "\nExited due to fatal error(s).")
3906 if cd.found_valid_file is None:
3907 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3908 + "Are you absolutely certain you are pointing cvs2svn\n"
3909 + "at a CVS repository?\n"
3910 + "\nExited due to fatal error(s).")
3912 StatsKeeper().reset_c_rev_info()
3913 StatsKeeper().archive()
3914 Log().write(LOG_QUIET, "Done")
3916 def pass2():
3917 "Pass 2: clean up the revision information."
3919 symbol_db = SymbolDatabase()
3920 symbol_db.read()
3922 # Convert the list of regexps to a list of strings
3923 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
3925 error_detected = 0
3927 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
3928 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
3929 if blocked_excludes:
3930 for branch, blockers in blocked_excludes.items():
3931 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
3932 "excluded because the following symbols depend "
3933 "on it:\n" % (branch))
3934 for blocker in blockers:
3935 sys.stderr.write(" '%s'\n" % (blocker))
3936 sys.stderr.write("\n")
3937 error_detected = 1
3939 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
3940 invalid_forced_tags = [ ]
3941 for forced_tag in Ctx().forced_tags:
3942 if excludes.has_key(forced_tag):
3943 continue
3944 if symbol_db.branch_has_commit(forced_tag):
3945 invalid_forced_tags.append(forced_tag)
3946 if invalid_forced_tags:
3947 sys.stderr.write(error_prefix + ": The following branches cannot be "
3948 "forced to be tags because they have commits:\n")
3949 for tag in invalid_forced_tags:
3950 sys.stderr.write(" '%s'\n" % (tag))
3951 sys.stderr.write("\n")
3952 error_detected = 1
3954 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
3955 mismatches = symbol_db.find_mismatches(excludes)
3956 def is_not_forced(mismatch):
3957 name = mismatch[0]
3958 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
3959 mismatches = filter(is_not_forced, mismatches)
3960 if mismatches:
3961 sys.stderr.write(error_prefix + ": The following symbols are tags "
3962 "in some files and branches in others.\nUse "
3963 "--force-tag, --force-branch and/or --exclude to "
3964 "resolve the symbols.\n")
3965 for name, tag_count, branch_count, commit_count in mismatches:
3966 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
3967 "%d files and has commits in %d files.\n"
3968 % (name, tag_count, branch_count, commit_count))
3969 error_detected = 1
3971 # Bail out now if we found errors
3972 if error_detected:
3973 sys.exit(1)
3975 # Create the tags database
3976 tags_db = TagsDatabase(DB_OPEN_NEW)
3977 for tag in symbol_db.tags.keys():
3978 if tag not in Ctx().forced_branches:
3979 tags_db[tag] = None
3980 for tag in Ctx().forced_tags:
3981 tags_db[tag] = None
3983 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
3985 # We may have recorded some changes in revisions' timestamp. We need to
3986 # scan for any other files which may have had the same log message and
3987 # occurred at "the same time" and change their timestamps, too.
3989 # read the resync data file
3990 def read_resync(fname):
3991 "Read the .resync file into memory."
3993 ### note that we assume that we can hold the entire resync file in
3994 ### memory. really large repositories with whacky timestamps could
3995 ### bust this assumption. should that ever happen, then it is possible
3996 ### to split the resync file into pieces and make multiple passes,
3997 ### using each piece.
4000 # A digest maps to a sequence of lists which specify a lower and upper
4001 # time bound for matching up the commit. We keep a sequence of these
4002 # because a number of checkins with the same log message (e.g. an empty
4003 # log message) could need to be remapped. We also make them a list because
4004 # we will dynamically expand the lower/upper bound as we find commits
4005 # that fall into a particular msg and time range.
4007 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4009 resync = { }
4011 for line in fileinput.FileInput(fname):
4012 t1 = int(line[:8], 16)
4013 digest = line[9:DIGEST_END_IDX]
4014 t2 = int(line[DIGEST_END_IDX+1:], 16)
4015 t1_l = t1 - COMMIT_THRESHOLD/2
4016 t1_u = t1 + COMMIT_THRESHOLD/2
4017 if resync.has_key(digest):
4018 resync[digest].append([t1_l, t1_u, t2])
4019 else:
4020 resync[digest] = [ [t1_l, t1_u, t2] ]
4022 # For each digest, sort the resync items in it in increasing order,
4023 # based on the lower time bound.
4024 digests = resync.keys()
4025 for digest in digests:
4026 (resync[digest]).sort()
4028 return resync
4030 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4032 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4033 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4035 # process the revisions file, looking for items to clean up
4036 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4037 c_rev = CVSRevision(Ctx(), line[:-1])
4039 # Skip this entire revision if it's on an excluded branch
4040 if excludes.has_key(c_rev.branch_name):
4041 continue
4043 # Remove all references to excluded tags and branches
4044 def not_excluded(symbol, excludes=excludes):
4045 return not excludes.has_key(symbol)
4046 c_rev.branches = filter(not_excluded, c_rev.branches)
4047 c_rev.tags = filter(not_excluded, c_rev.tags)
4049 # Convert all branches that are forced to be tags
4050 for forced_tag in Ctx().forced_tags:
4051 if forced_tag in c_rev.branches:
4052 c_rev.branches.remove(forced_tag)
4053 c_rev.tags.append(forced_tag)
4055 # Convert all tags that are forced to be branches
4056 for forced_branch in Ctx().forced_branches:
4057 if forced_branch in c_rev.tags:
4058 c_rev.tags.remove(forced_branch)
4059 c_rev.branches.append(forced_branch)
4061 # see if this is "near" any of the resync records we
4062 # have recorded for this digest [of the log message].
4063 for record in resync.get(c_rev.digest, []):
4064 if record[0] <= c_rev.timestamp <= record[1]:
4065 # bingo! remap the time on this (record[2] is the new time).
4067 # adjust the time range. we want the COMMIT_THRESHOLD from the
4068 # bounds of the earlier/latest commit in this group.
4069 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4070 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4072 # By default this will be the new timestamp
4073 new_timestamp = record[2]
4074 # If the new timestamp is earlier than that of our previous revision
4075 if record[2] < c_rev.prev_timestamp:
4076 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4077 + " to time %s, which is before previous the time of"
4078 + " revision %s (%s):")
4079 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4080 c_rev.cvs_path, record[2],
4081 c_rev.prev_rev, c_rev.prev_timestamp))
4082 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4083 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4084 # attempted sync time, then sync back to c_rev.prev_timestamp
4085 # + 1...
4086 if (c_rev.prev_timestamp - record[2]) < COMMIT_THRESHOLD:
4087 new_timestamp = c_rev.prev_timestamp + 1
4088 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4089 new_timestamp))
4090 # ...otherwise, make no change
4091 else:
4092 new_timestamp = c_rev.timestamp
4093 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4094 warning_prefix)
4096 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4097 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4098 record[2] - c_rev.timestamp)
4099 Log().write(LOG_VERBOSE, msg)
4101 c_rev.timestamp = new_timestamp
4103 # stop looking for hits
4104 break
4106 output.write(str(c_rev) + "\n")
4107 Log().write(LOG_QUIET, "Done")
4109 def pass3():
4110 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4111 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4112 temp(DATAFILE + SORTED_REVS_SUFFIX))
4113 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4114 Log().write(LOG_QUIET, "Done")
4116 def pass4():
4117 """Iterate through sorted revs, storing them in a database.
4118 If we're not doing a trunk-only conversion, generate the
4119 LastSymbolicNameDatabase, which contains the last CVSRevision
4120 that is a source for each tag or branch.
4122 Log().write(LOG_QUIET,
4123 "Copying CVS revision data from flat file to database...")
4124 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4125 if not Ctx().trunk_only:
4126 Log().write(LOG_QUIET,
4127 "and finding last CVS revisions for all symbolic names...")
4128 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4129 else:
4130 # This is to avoid testing Ctx().trunk_only every time around the loop
4131 class DummyLSNDB:
4132 def noop(*args): pass
4133 log_revision = noop
4134 create_database = noop
4135 last_sym_name_db = DummyLSNDB()
4137 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4138 c_rev = CVSRevision(Ctx(), line[:-1])
4139 cvs_revs_db.log_revision(c_rev)
4140 last_sym_name_db.log_revision(c_rev)
4141 StatsKeeper().record_c_rev(c_rev)
4143 last_sym_name_db.create_database()
4144 StatsKeeper().archive()
4145 Log().write(LOG_QUIET, "Done")
4147 def pass5():
4149 Generate the SVNCommit <-> CVSRevision mapping
4150 databases. CVSCommit._commit also calls SymbolingsLogger to register
4151 CVSRevisions that represent an opening or closing for a path on a
4152 branch or tag. See SymbolingsLogger for more details.
4154 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4156 aggregator = CVSRevisionAggregator()
4157 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4158 c_rev = CVSRevision(Ctx(), line[:-1])
4159 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4160 aggregator.process_revision(c_rev)
4161 aggregator.flush()
4163 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4164 StatsKeeper().archive()
4165 Log().write(LOG_QUIET, "Done")
4167 def pass6():
4168 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4170 if not Ctx().trunk_only:
4171 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4172 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4173 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4174 Log().write(LOG_QUIET, "Done")
4176 def pass7():
4177 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4179 def generate_offsets_for_symbolings():
4180 """This function iterates through all the lines in
4181 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4182 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4183 where SYMBOLIC_NAME is first encountered. This will allow us to
4184 seek to the various offsets in the file and sequentially read only
4185 the openings and closings that we need."""
4187 ###PERF This is a fine example of a db that can be in-memory and
4188 #just flushed to disk when we're done. Later, it can just be sucked
4189 #back into memory.
4190 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4191 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4193 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4194 old_sym = ""
4195 while 1:
4196 fpos = file.tell()
4197 line = file.readline()
4198 if not line:
4199 break
4200 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4201 if not sym == old_sym:
4202 Log().write(LOG_VERBOSE, " ", sym)
4203 old_sym = sym
4204 offsets_db[sym] = fpos
4206 if not Ctx().trunk_only:
4207 generate_offsets_for_symbolings()
4208 Log().write(LOG_QUIET, "Done.")
4210 def pass8():
4211 svncounter = 2 # Repository initialization is 1.
4212 repos = SVNRepositoryMirror()
4213 persistence_manager = PersistenceManager(DB_OPEN_READ)
4215 if (Ctx().target):
4216 if not Ctx().dry_run:
4217 repos.add_delegate(RepositoryDelegate())
4218 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4219 else:
4220 if not Ctx().dry_run:
4221 repos.add_delegate(DumpfileDelegate())
4222 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4224 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4226 while(1):
4227 svn_commit = persistence_manager.get_svn_commit(svncounter)
4228 if not svn_commit:
4229 break
4230 repos.commit(svn_commit)
4231 svncounter += 1
4233 repos.finish()
4235 _passes = [
4236 pass1,
4237 pass2,
4238 pass3,
4239 pass4,
4240 pass5,
4241 pass6,
4242 pass7,
4243 pass8,
4247 class Ctx:
4248 """Session state for this run of cvs2svn. For example, run-time
4249 options are stored here. This class is a Borg, see
4250 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4252 __shared_state = { }
4253 def __init__(self):
4254 self.__dict__ = self.__shared_state
4255 if self.__dict__:
4256 return
4257 # Else, initialize to defaults.
4258 self.cvsroot = None
4259 self.target = None
4260 self.dumpfile = DUMPFILE
4261 self.tmpdir = '.'
4262 self.verbose = 0
4263 self.quiet = 0
4264 self.prune = 1
4265 self.existing_svnrepos = 0
4266 self.dump_only = 0
4267 self.dry_run = 0
4268 self.trunk_only = 0
4269 self.trunk_base = "trunk"
4270 self.tags_base = "tags"
4271 self.branches_base = "branches"
4272 self.encoding = "ascii"
4273 self.mime_types_file = None
4274 self.mime_mapper = None
4275 self.no_default_eol = 0
4276 self.eol_from_mime_type = 0
4277 self.keywords_off = 0
4278 self.use_cvs = None
4279 self.svnadmin = "svnadmin"
4280 self.username = None
4281 self.print_help = 0
4282 self.skip_cleanup = 0
4283 self.cvs_revnums = 0
4284 self.bdb_txn_nosync = 0
4285 self.fs_type = None
4286 self.forced_branches = []
4287 self.forced_tags = []
4288 self.excludes = []
4289 self.symbol_transforms = []
4291 class MimeMapper:
4292 """A class that provides mappings from file names to MIME types.
4293 Note that we should really be using Python's 'mimetypes' module.
4294 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4295 for more."""
4297 def __init__(self):
4298 self.mappings = { }
4300 def set_mime_types_file(self, mime_types_file):
4301 for line in fileinput.input(mime_types_file):
4302 if line.startswith("#"):
4303 continue
4305 # format of a line is something like
4306 # text/plain c h cpp
4307 extensions = line.split()
4308 if len(extensions) < 2:
4309 continue
4310 type = extensions.pop(0)
4311 for ext in extensions:
4312 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4313 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
4314 % (warning_prefix, ext, self.mappings[ext], type))
4315 self.mappings[ext] = type
4318 def get_type_from_filename(self, filename):
4319 basename, extension = os.path.splitext(os.path.basename(filename))
4321 # Extension includes the dot, so strip it (will leave extension
4322 # empty if filename ends with a dot, which is ok):
4323 extension = extension[1:]
4325 # If there is no extension (or the file ends with a period), use
4326 # the base name for mapping. This allows us to set mappings for
4327 # files such as README or Makefile:
4328 if not extension:
4329 extension = basename
4330 if self.mappings.has_key(extension):
4331 return self.mappings[extension]
4332 return None
4335 def convert(start_pass, end_pass):
4336 "Convert a CVS repository to an SVN repository."
4338 cleanup = Cleanup()
4339 times = [ None ] * (end_pass + 1)
4340 times[start_pass - 1] = time.time()
4341 StatsKeeper().set_start_time(time.time())
4342 for i in range(start_pass - 1, end_pass):
4343 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4344 _passes[i]()
4345 times[i + 1] = time.time()
4346 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4347 # Dispose of items in Ctx() not intended to live past the end of the pass
4348 # (Identified by exactly one leading underscore)
4349 for attr in dir(Ctx()):
4350 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4351 and not attr[:6] == "_Ctx__"):
4352 delattr(Ctx(), attr)
4353 if not Ctx().skip_cleanup:
4354 cleanup.cleanup(_passes[i])
4355 StatsKeeper().set_end_time(time.time())
4357 Log().write(LOG_QUIET, StatsKeeper())
4358 if end_pass < 4:
4359 Log().write(LOG_QUIET, '(These are unaltered CVS repository stats and do not\n'
4360 + ' reflect tags or branches excluded via --exclude)\n')
4361 print StatsKeeper().timings()
4364 def usage():
4365 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4366 % os.path.basename(sys.argv[0])
4367 print ' --help, -h print this usage message and exit with success'
4368 print ' --version print the version number'
4369 print ' -q quiet'
4370 print ' -v verbose'
4371 print ' -s PATH path for SVN repos'
4372 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
4373 print ' If only START is given, run only pass START'
4374 print ' (implicitly enables --skip-cleanup)'
4375 print ' --existing-svnrepos load into existing SVN repository'
4376 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4377 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4378 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4379 print ' --dry-run do not create a repository or a dumpfile;'
4380 print ' just print what would happen.'
4381 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4382 print ' (only use this if having problems with RCS)'
4383 print ' --svnadmin=PATH path to the svnadmin program'
4384 print ' --trunk-only convert only trunk commits, not tags nor branches'
4385 print ' --trunk=PATH path for trunk (default: %s)' \
4386 % Ctx().trunk_base
4387 print ' --branches=PATH path for branches (default: %s)' \
4388 % Ctx().branches_base
4389 print ' --tags=PATH path for tags (default: %s)' \
4390 % Ctx().tags_base
4391 print ' --no-prune don\'t prune empty directories'
4392 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4393 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
4394 % Ctx().encoding
4395 print ' --force-branch=NAME force NAME to be a branch'
4396 print ' --force-tag=NAME force NAME to be a tag'
4397 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4398 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4399 print ' use Python regexp and reference syntax respectively'
4400 print ' --username=NAME username for cvs2svn-synthesized commits'
4401 print ' --skip-cleanup prevent the deletion of intermediate files'
4402 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4403 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4404 print ' --cvs-revnums record CVS revision numbers as file properties'
4405 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4406 ' setting svn:mime-type'
4407 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4408 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4409 print ' --keywords-off don\'t set svn:keywords on any files (cvs2svn sets'
4410 print ' "svn:keywords to author date id" on non-binary files'
4411 print ' by default)'
4413 def main():
4414 # Convenience var, so we don't have to keep instantiating this Borg.
4415 ctx = Ctx()
4417 profiling = None
4418 start_pass = 1
4419 end_pass = len(_passes)
4421 try:
4422 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4423 [ "help", "create", "trunk=",
4424 "username=", "existing-svnrepos",
4425 "branches=", "tags=", "encoding=",
4426 "force-branch=", "force-tag=", "exclude=",
4427 "use-cvs", "mime-types=",
4428 "eol-from-mime-type", "no-default-eol",
4429 "trunk-only", "no-prune", "dry-run",
4430 "dump-only", "dumpfile=", "tmpdir=",
4431 "svnadmin=", "skip-cleanup", "cvs-revnums",
4432 "bdb-txn-nosync", "fs-type=",
4433 "version", "profile",
4434 "keywords-off", "symbol-transform="])
4435 except getopt.GetoptError, e:
4436 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4437 usage()
4438 sys.exit(1)
4440 for opt, value in opts:
4441 if opt == '--version':
4442 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4443 sys.exit(0)
4444 elif opt == '-p':
4445 # Don't cleanup if we're doing incrementals.
4446 ctx.skip_cleanup = 1
4447 if value.find(':') > 0:
4448 start_pass, end_pass = map(int, value.split(':'))
4449 else:
4450 end_pass = start_pass = int(value)
4451 if start_pass > len(_passes) or start_pass < 1:
4452 print '%s: illegal value (%d) for starting pass. '\
4453 'must be 1 through %d.' % (error_prefix, int(start_pass),
4454 len(_passes))
4455 sys.exit(1)
4456 if end_pass < start_pass or end_pass > len(_passes):
4457 print '%s: illegal value (%d) for ending pass. ' \
4458 'must be %d through %d.' % (error_prefix, int(end_pass),
4459 int(start_pass), len(_passes))
4460 sys.exit(1)
4461 elif (opt == '--help') or (opt == '-h'):
4462 ctx.print_help = 1
4463 elif opt == '-v':
4464 Log().log_level = LOG_VERBOSE
4465 ctx.verbose = 1
4466 elif opt == '-q':
4467 Log().log_level = LOG_QUIET
4468 ctx.quiet = 1
4469 elif opt == '-s':
4470 ctx.target = value
4471 elif opt == '--existing-svnrepos':
4472 ctx.existing_svnrepos = 1
4473 elif opt == '--dumpfile':
4474 ctx.dumpfile = value
4475 elif opt == '--tmpdir':
4476 ctx.tmpdir = value
4477 elif opt == '--use-cvs':
4478 ctx.use_cvs = 1
4479 elif opt == '--svnadmin':
4480 ctx.svnadmin = value
4481 elif opt == '--trunk-only':
4482 ctx.trunk_only = 1
4483 elif opt == '--trunk':
4484 if not value:
4485 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4486 ctx.trunk_base = value
4487 elif opt == '--branches':
4488 if not value:
4489 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4490 ctx.branches_base = value
4491 elif opt == '--tags':
4492 if not value:
4493 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4494 ctx.tags_base = value
4495 elif opt == '--no-prune':
4496 ctx.prune = None
4497 elif opt == '--dump-only':
4498 ctx.dump_only = 1
4499 elif opt == '--dry-run':
4500 ctx.dry_run = 1
4501 elif opt == '--encoding':
4502 ctx.encoding = value
4503 elif opt == '--force-branch':
4504 ctx.forced_branches.append(value)
4505 elif opt == '--force-tag':
4506 ctx.forced_tags.append(value)
4507 elif opt == '--exclude':
4508 try:
4509 ctx.excludes.append(re.compile('^' + value + '$'))
4510 except re.error, e:
4511 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4512 elif opt == '--mime-types':
4513 ctx.mime_types_file = value
4514 elif opt == '--eol-from-mime-type':
4515 ctx.eol_from_mime_type = 1
4516 elif opt == '--no-default-eol':
4517 ctx.no_default_eol = 1
4518 elif opt == '--keywords-off':
4519 ctx.keywords_off = 1
4520 elif opt == '--username':
4521 ctx.username = value
4522 elif opt == '--skip-cleanup':
4523 ctx.skip_cleanup = 1
4524 elif opt == '--cvs-revnums':
4525 ctx.cvs_revnums = 1
4526 elif opt == '--bdb-txn-nosync':
4527 ctx.bdb_txn_nosync = 1
4528 elif opt == '--fs-type':
4529 ctx.fs_type = value
4530 elif opt == '--create':
4531 sys.stderr.write(warning_prefix +
4532 ': The behaviour produced by the --create option is now the '
4533 'default,\nand passing the option is deprecated.\n')
4534 elif opt == '--profile':
4535 profiling = 1
4536 elif opt == '--symbol-transform':
4537 ctx.symbol_transforms.append(value.split(":"))
4539 if ctx.print_help:
4540 usage()
4541 sys.exit(0)
4543 # Consistency check for options and arguments.
4544 if len(args) == 0:
4545 usage()
4546 sys.exit(1)
4548 if len(args) > 1:
4549 sys.stderr.write(error_prefix +
4550 ": must pass only one CVS repository.\n")
4551 usage()
4552 sys.exit(1)
4554 ctx.cvsroot = args[0]
4556 if not os.path.isdir(ctx.cvsroot):
4557 sys.stderr.write(error_prefix +
4558 ": the given CVS repository path '%s' is not an "
4559 "existing directory.\n" % ctx.cvsroot)
4560 sys.exit(1)
4562 if ctx.use_cvs:
4563 # Ascend above the specified root if necessary, to find the cvs_repository
4564 # (a directory containing a CVSROOT directory) and the cvs_module (the
4565 # path of the conversion root within the cvs repository)
4566 # NB: cvs_module must be seperated by '/' *not* by os.sep .
4567 ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
4568 prev_cvs_repository = None
4569 ctx.cvs_module = ""
4570 while prev_cvs_repository != ctx.cvs_repository:
4571 if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
4572 break
4573 prev_cvs_repository = ctx.cvs_repository
4574 ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
4575 ctx.cvs_module = module_component + "/" + ctx.cvs_module
4576 else:
4577 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
4578 sys.stderr.write(error_prefix +
4579 ": the path '%s' is not a CVS repository, nor a path " \
4580 "within a CVS repository. A CVS repository contains " \
4581 "a CVSROOT directory within its root directory.\n" \
4582 % ctx.cvsroot)
4583 sys.exit(1)
4584 os.environ['CVSROOT'] = ctx.cvs_repository
4586 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
4587 sys.stderr.write(error_prefix +
4588 ": must pass one of '-s' or '--dump-only'.\n")
4589 sys.exit(1)
4591 def not_both(opt1val, opt1name, opt2val, opt2name):
4592 if opt1val and opt2val:
4593 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4594 % (opt1name, opt2name))
4595 sys.exit(1)
4597 not_both(ctx.target, '-s',
4598 ctx.dump_only, '--dump-only')
4600 not_both(ctx.dump_only, '--dump-only',
4601 ctx.existing_svnrepos, '--existing-svnrepos')
4603 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4604 ctx.existing_svnrepos, '--existing-svnrepos')
4606 not_both(ctx.dump_only, '--dump-only',
4607 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4609 not_both(ctx.quiet, '-q',
4610 ctx.verbose, '-v')
4612 not_both(ctx.fs_type, '--fs-type',
4613 ctx.existing_svnrepos, '--existing-svnrepos')
4615 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
4616 sys.stderr.write(error_prefix +
4617 ": cannot pass --bdb-txn-nosync with --fs-type=%s.\n" \
4618 % ctx.fs_type)
4619 sys.exit(1)
4621 if ((string.find(ctx.trunk_base, '/') > -1)
4622 or (string.find(ctx.tags_base, '/') > -1)
4623 or (string.find(ctx.branches_base, '/') > -1)):
4624 sys.stderr.write("%s: cannot pass multicomponent path to "
4625 "--trunk, --tags, or --branches yet.\n"
4626 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4627 "id=7 for details.\n" % error_prefix)
4628 sys.exit(1)
4630 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4631 sys.stderr.write(error_prefix +
4632 ": the svn-repos-path '%s' is not an "
4633 "existing directory.\n" % ctx.target)
4634 sys.exit(1)
4636 if not ctx.dump_only and not ctx.existing_svnrepos \
4637 and (not ctx.dry_run) and os.path.exists(ctx.target):
4638 sys.stderr.write(error_prefix +
4639 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4640 "'--existing-svnrepos'.\n" % ctx.target)
4641 sys.exit(1)
4643 if ctx.mime_types_file:
4644 ctx.mime_mapper = MimeMapper()
4645 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4647 # Make sure the tmp directory exists. Note that we don't check if
4648 # it's empty -- we want to be able to use, for example, "." to hold
4649 # tempfiles. But if we *did* want check if it were empty, we'd do
4650 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
4651 if not os.path.exists(ctx.tmpdir):
4652 os.mkdir(ctx.tmpdir)
4653 elif not os.path.isdir(ctx.tmpdir):
4654 sys.stderr.write(error_prefix +
4655 ": cvs2svn tried to use '%s' for temporary files, but that path\n"
4656 " exists and is not a directory. Please make it be a directory,\n"
4657 " or specify some other directory for temporary files.\n" \
4658 % ctx.tmpdir)
4659 sys.exit(1)
4661 if ctx.use_cvs:
4662 def cvs_ok():
4663 pipe = Popen3('cvs %s --version' % Ctx().cvs_global_arguments, True)
4664 pipe.tochild.close()
4665 pipe.fromchild.read()
4666 errmsg = pipe.childerr.read()
4667 status = pipe.wait()
4668 ok = len(errmsg) == 0 and status == 0
4669 return (ok, status, errmsg)
4671 ctx.cvs_global_arguments = "-q -R"
4672 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4673 if not ok:
4674 ctx.cvs_global_arguments = "-q"
4675 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4677 if not ok:
4678 sys.stderr.write(error_prefix +
4679 ": error executing CVS: status %s, error output:\n" \
4680 % (cvs_exitstatus) + cvs_errmsg)
4682 # But do lock the tmpdir, to avoid process clash.
4683 try:
4684 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4685 except OSError, e:
4686 if e.errno == errno.EACCES:
4687 sys.stderr.write(error_prefix + ": Permission denied:"
4688 + " No write access to output directory.\n")
4689 sys.exit(1)
4690 if e.errno == errno.EEXIST:
4691 sys.stderr.write(error_prefix +
4692 ": cvs2svn is using directory '%s' for temporary files, but\n"
4693 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
4694 " cvs2svn process is currently using '%s' as its temporary\n"
4695 " workspace. If you are certain that is not the case,\n"
4696 " then remove the '%s/cvs2svn.lock' subdirectory.\n" \
4697 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir))
4698 sys.exit(1)
4699 raise
4700 try:
4701 if profiling:
4702 import hotshot
4703 prof = hotshot.Profile('cvs2svn.hotshot')
4704 prof.runcall(convert, start_pass, end_pass)
4705 prof.close()
4706 else:
4707 convert(start_pass, end_pass)
4708 finally:
4709 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4710 except: pass
4712 if __name__ == '__main__':
4713 main()