Fix handling of "branch ;" in RCS file.
[cvs2svn.git] / cvs2svn
blobd3b195922f75322ce9743349b1630689eed27424
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import string
33 import md5
34 import marshal
35 import errno
36 import popen2
38 # Warnings and errors start with these strings. They are typically
39 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
40 warning_prefix = "WARNING"
41 error_prefix = "ERROR"
43 # Make sure this Python is recent enough.
44 if sys.hexversion < 0x2000000:
45 sys.stderr.write("'%s: Python 2.0 or higher required, "
46 "see www.python.org.\n" % error_prefix)
47 sys.exit(1)
49 # Pretend we have true booleans on older python versions
50 try:
51 True
52 except:
53 True = 1
54 False = 0
56 # Minimal, incomplete, version of popen2.Popen3 for those platforms
57 # for which popen2 does not provide it.
58 try:
59 Popen3 = popen2.Popen3
60 except AttributeError:
61 class Popen3:
62 def __init__(self, cmd, capturestderr):
63 if type(cmd) != str:
64 cmd = " ".join(cmd)
65 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
66 mode='b')
67 def wait(self):
68 return self.fromchild.close() or self.tochild.close() or \
69 self.childerr.close()
71 # DBM module selection
73 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
74 # so that the dbhash module used by anydbm will use bsddb3.
75 try:
76 import bsddb3
77 sys.modules['bsddb'] = sys.modules['bsddb3']
78 except ImportError:
79 pass
81 # 2. These DBM modules are not good for cvs2svn.
82 import anydbm
83 if (anydbm._defaultmod.__name__ == 'dumbdbm'
84 or anydbm._defaultmod.__name__ == 'dbm'):
85 print 'ERROR: your installation of Python does not contain a suitable'
86 print ' DBM module. This script cannot continue.'
87 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
88 print ' for details.'
89 sys.exit(1)
91 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
92 # Unfortunately, gdbm appears not to be trouble free, either.
93 if hasattr(anydbm._defaultmod, 'bsddb') \
94 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
95 try:
96 gdbm = __import__('gdbm')
97 except ImportError:
98 sys.stderr.write(warning_prefix +
99 ': The version of the bsddb module found '
100 'on your computer has been reported to malfunction on some datasets, '
101 'causing KeyError exceptions. You may wish to upgrade your Python to '
102 'version 2.3 or later.\n')
103 else:
104 anydbm._defaultmod = gdbm
106 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
107 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
108 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
110 # This really only matches standard '1.1.1.*'-style vendor revisions.
111 # One could conceivably have a file whose default branch is 1.1.3 or
112 # whatever, or was that at some point in time, with vendor revisions
113 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
114 # is the only time this regexp gets used), we'd have no basis for
115 # assuming that the non-standard vendor branch had ever been the
116 # default branch anyway, so we don't want this to match them anyway.
117 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
119 # If this run's output is a repository, then (in the tmpdir) we use
120 # a dumpfile of this name for repository loads.
122 # If this run's output is a dumpfile, then this is default name of
123 # that dumpfile, but in the current directory (unless the user has
124 # specified a dumpfile path, of course, in which case it will be
125 # wherever the user said).
126 DUMPFILE = 'cvs2svn-dump'
128 # This file appears with different suffixes at different stages of
129 # processing. CVS revisions are cleaned and sorted here, for commit
130 # grouping. See design-notes.txt for details.
131 DATAFILE = 'cvs2svn-data'
133 # This file contains a marshalled copy of all the statistics that we
134 # gather throughout the various runs of cvs2svn. The data stored as a
135 # marshalled dictionary.
136 STATISTICS_FILE = 'cvs2svn-statistics'
138 # This text file contains records (1 per line) that describe svn
139 # filesystem paths that are the opening and closing source revisions
140 # for copies to tags and branches. The format is as follows:
142 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
144 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
145 # SVN_REVNUM are the primary and secondary sorting criteria for
146 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
147 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
148 # A sorted version of the above file.
149 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
151 # This file is a temporary file for storing symbolic_name -> closing
152 # CVSRevision until the end of our pass where we can look up the
153 # corresponding SVNRevNum for the closing revs and write these out to
154 # the SYMBOL_OPENINGS_CLOSINGS.
155 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
157 # Skeleton version of an svn filesystem.
158 # (These supersede and will eventually replace the two above.)
159 # See class SVNRepositoryMirror for how these work.
160 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
161 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
163 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
164 # SYMBOL_OPENINGS_CLOSINGS_SORTED
165 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
167 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
168 # the CVSRevision is the last such that is a source for those symbolic
169 # names. For example, if branch B's number is 1.3.0.2 in this CVS
170 # file, and this file's 1.3 is the latest (by date) revision among
171 # *all* CVS files that is a source for branch B, then the
172 # CVSRevision.unique_key() corresponding to this file at 1.3 would
173 # list at least B in its list.
174 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
176 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
177 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
178 ### the s-revs data in this database.
179 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
181 # Lists all symbolic names that are tags. Keys are strings (symbolic
182 # names), values are ignorable.
183 TAGS_DB = 'cvs2svn-tags.db'
185 # A list all tags. Each line consists of the tag name and the number
186 # of files in which it exists, separated by a space.
187 TAGS_LIST = 'cvs2svn-tags.txt'
189 # A list of all branches. The file is stored as a plain text file
190 # to make it easy to look at in an editor. Each line contains the
191 # branch name, the number of files where the branch is created, the
192 # commit count, and a list of tags and branches that are defined on
193 # revisions in the branch.
194 BRANCHES_LIST = 'cvs2svn-branches.txt'
196 # These two databases provide a bidirectional mapping between
197 # CVSRevision.unique_key()s and Subversion revision numbers.
199 # The first maps CVSRevision.unique_key() to a number; the values are
200 # not unique.
202 # The second maps a number to a list of CVSRevision.unique_key()s.
203 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
204 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
206 # This database maps svn_revnums to tuples of (symbolic_name, date).
208 # The svn_revnums are the revision numbers of all non-primary
209 # SVNCommits. No primary SVNCommit has a key in this database.
211 # The date is stored for all commits in this database.
213 # For commits that fill symbolic names, the symbolic_name is stored.
214 # For commits that default branch syncs, the symbolic_name is None.
215 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
217 # This database maps svn_revnums of a default branch synchronization
218 # commit to the svn_revnum of the primary SVNCommit that motivated it.
220 # (NOTE: Secondary commits that fill branches and tags also have a
221 # motivating commit, but we do not record it because it is (currently)
222 # not needed for anything.)
224 # This mapping is used when generating the log message for the commit
225 # that synchronizes the default branch with trunk.
226 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
228 # How many bytes to read at a time from a pipe. 128 kiB should be
229 # large enough to be efficient without wasting too much memory.
230 PIPE_READ_SIZE = 128 * 1024
232 # Record the default RCS branches, if any, for CVS filepaths.
234 # The keys are CVS filepaths, relative to the top of the repository
235 # and with the ",v" stripped off, so they match the cvs paths used in
236 # Commit.commit(). The values are vendor branch revisions, such as
237 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
238 # represents the highest vendor branch revision thought to have ever
239 # been head of the default branch.
241 # The reason we record a specific vendor revision, rather than a
242 # default branch number, is that there are two cases to handle:
244 # One case is simple. The RCS file lists a default branch explicitly
245 # in its header, such as '1.1.1'. In this case, we know that every
246 # revision on the vendor branch is to be treated as head of trunk at
247 # that point in time.
249 # But there's also a degenerate case. The RCS file does not currently
250 # have a default branch, yet we can deduce that for some period in the
251 # past it probably *did* have one. For example, the file has vendor
252 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
253 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
254 # case, we should record 1.1.1.96 as the last vendor revision to have
255 # been the head of the default branch.
256 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
258 # Records the author and log message for each changeset.
259 # The keys are author+log digests, the same kind used to identify
260 # unique revisions in the .revs, etc files. Each value is a tuple
261 # of two elements: '(author logmessage)'.
262 METADATA_DB = "cvs2svn-metadata.db"
264 REVS_SUFFIX = '.revs'
265 CLEAN_REVS_SUFFIX = '.c-revs'
266 SORTED_REVS_SUFFIX = '.s-revs'
267 RESYNC_SUFFIX = '.resync'
269 SVN_INVALID_REVNUM = -1
271 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
273 # Things that can happen to a file.
274 OP_NOOP = '-'
275 OP_ADD = 'A'
276 OP_DELETE = 'D'
277 OP_CHANGE = 'C'
279 # A deltatext either does or doesn't represent some change.
280 DELTATEXT_NONEMPTY = 'N'
281 DELTATEXT_EMPTY = 'E'
283 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
285 # Constants used in SYMBOL_OPENINGS_CLOSINGS
286 OPENING = 'O'
287 CLOSING = 'C'
289 def temp(basename):
290 """Return a path to BASENAME in Ctx().tmpdir.
291 This is a convenience function to save horizontal space in source."""
292 return os.path.join(Ctx().tmpdir, basename)
294 # Since the unofficial set also includes [/\] we need to translate those
295 # into ones that don't conflict with Subversion limitations.
296 def _clean_symbolic_name(name):
297 """Return symbolic name NAME, translating characters that Subversion
298 does not allow in a pathname."""
299 name = name.replace('/','++')
300 name = name.replace('\\','--')
301 return name
303 def _path_join(*components):
304 """Join two or more pathname COMPONENTS, inserting '/' as needed.
305 Empty component are skipped."""
306 return string.join(filter(None, components), '/')
308 def run_command(command):
309 if os.system(command):
310 sys.exit('Command failed: "%s"' % command)
312 def relative_name(cvsroot, fname):
313 l = len(cvsroot)
314 if fname[:l] == cvsroot:
315 if fname[l] == os.sep:
316 return string.replace(fname[l+1:], os.sep, '/')
317 return string.replace(fname[l:], os.sep, '/')
318 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
319 " cvsroot\n" % (error_prefix, cvsroot, fname))
320 sys.exit(1)
322 def get_co_pipe(c_rev):
323 """Return a command string, and the pipe created using that string.
324 C_REV is a CVSRevision. The pipe returns the text of that CVS Revision."""
325 ctx = Ctx()
326 if ctx.use_cvs:
327 pipe_cmd = 'cvs %s co -r%s -p %s' % \
328 (ctx.cvs_global_arguments, c_rev.rev,
329 escape_shell_arg(ctx.cvs_module + c_rev.cvs_path))
330 else:
331 pipe_cmd = 'co -q -x,v -p%s %s' % \
332 (c_rev.rev, escape_shell_arg(c_rev.rcs_path()))
333 pipe = Popen3(pipe_cmd, True)
334 pipe.tochild.close()
335 return pipe_cmd, pipe
337 def generate_ignores(c_rev):
338 # Read in props
339 pipe_cmd, pipe = get_co_pipe(c_rev)
340 buf = pipe.fromchild.read(PIPE_READ_SIZE)
341 raw_ignore_val = ""
342 while buf:
343 raw_ignore_val = raw_ignore_val + buf
344 buf = pipe.fromchild.read(PIPE_READ_SIZE)
345 pipe.fromchild.close()
346 error_output = pipe.childerr.read()
347 exit_status = pipe.wait()
348 if exit_status:
349 sys.exit("%s: The command '%s' failed with exit status: %s\n"
350 "and the following output:\n"
351 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
353 # Tweak props: First, convert any spaces to newlines...
354 raw_ignore_val = '\n'.join(raw_ignore_val.split())
355 raw_ignores = raw_ignore_val.split('\n')
356 ignore_vals = [ ]
357 for ignore in raw_ignores:
358 # Reset the list if we encounter a '!'
359 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
360 if ignore == '!':
361 ignore_vals = [ ]
362 continue
363 # Skip empty lines
364 if len(ignore) == 0:
365 continue
366 ignore_vals.append(ignore)
367 return ignore_vals
369 # Return a string that has not been returned by gen_key() before.
370 gen_key_base = 0L
371 def gen_key():
372 global gen_key_base
373 key = '%x' % gen_key_base
374 gen_key_base = gen_key_base + 1
375 return key
377 if sys.platform == "win32":
378 def escape_shell_arg(str):
379 return '"' + string.replace(str, '"', '"^""') + '"'
380 else:
381 def escape_shell_arg(str):
382 return "'" + string.replace(str, "'", "'\\''") + "'"
384 def format_date(date):
385 """Return an svn-compatible date string for DATE (seconds since epoch)."""
386 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
387 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
389 def sort_file(infile, outfile):
390 # sort the log files
392 # GNU sort will sort our dates differently (incorrectly!) if our
393 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
394 # it to 'C'
395 if os.environ.has_key('LC_ALL'):
396 lc_all_tmp = os.environ['LC_ALL']
397 else:
398 lc_all_tmp = None
399 os.environ['LC_ALL'] = 'C'
400 if sys.platform == "win32":
401 run_command('sort %s /T %s > %s' % (infile, Ctx().tmpdir, outfile))
402 else:
403 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
404 if lc_all_tmp is None:
405 del os.environ['LC_ALL']
406 else:
407 os.environ['LC_ALL'] = lc_all_tmp
409 def print_node_tree(tree, root_node, indent_depth=0):
410 """For debugging purposes. Prints all nodes in TREE that are
411 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
412 debugging with the print statement in this function."""
413 if not indent_depth:
414 print "TREE", "=" * 75
415 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
416 for key, value in tree[root_node].items():
417 if key[0] == '/': #Skip flags
418 continue
419 print_node_tree(tree, value, (indent_depth + 1))
421 def match_regexp_list(regexp_list, string):
422 """Return 1 if string matches any of the compiled regexps in REGEXP_LIST,
423 else return None."""
424 for regexp in regexp_list:
425 if regexp.match(string):
426 return 1
428 # These constants represent the log levels that this script supports
429 LOG_WARN = -1
430 LOG_QUIET = 0
431 LOG_NORMAL = 1
432 LOG_VERBOSE = 2
433 class Log:
434 """A Simple logging facility. Each line will be timestamped is
435 self.use_timestamps is TRUE. This class is a Borg, see
436 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
437 __shared_state = {}
438 def __init__(self):
439 self.__dict__ = self.__shared_state
440 if self.__dict__:
441 return
442 self.log_level = LOG_NORMAL
443 # Set this to true if you want to see timestamps on each line output.
444 self.use_timestamps = None
445 self.logger = sys.stdout
447 def _timestamp(self):
448 """Output a detailed timestamp at the beginning of each line output."""
449 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
451 def write(self, log_level, *args):
452 """This is the public method to use for writing to a file. Only
453 messages whose LOG_LEVEL is <= self.log_level will be printed. If
454 there are multiple ARGS, they will be separated by a space."""
455 if log_level > self.log_level:
456 return
457 if self.use_timestamps:
458 self._timestamp()
459 self.logger.write(' '.join(map(str,args)) + "\n")
462 class Cleanup:
463 """This singleton class manages any files created by cvs2svn. When
464 you first create a file, call Cleanup.register, passing the
465 filename, and the last pass that you need the file. After the end
466 of that pass, your file will be cleaned up after running an optional
467 callback. This class is a Borg, see
468 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
470 __shared_state = {}
471 def __init__(self):
472 self.__dict__ = self.__shared_state
473 if self.__dict__:
474 return
475 self._log = {}
476 self._callbacks = {}
478 def register(self, file, which_pass, callback=None):
479 """Register FILE for cleanup at the end of WHICH_PASS, running
480 function CALLBACK prior to removal. Registering a given FILE is
481 idempotent; you may register as many times as you wish, but it
482 will only be cleaned up once.
484 Note that if a file is registered multiple times, only the first
485 callback registered for that file will be called at cleanup
486 time. Also note that if you register a database file you must
487 close the database before cleanup, e.g. using a callback."""
488 if not self._log.has_key(which_pass):
489 self._log[which_pass] = {}
490 self._log[which_pass][file] = 1
491 if callback and not self._callbacks.has_key(file):
492 self._callbacks[file] = callback
494 def cleanup(self, which_pass):
495 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
496 if not self._log.has_key(which_pass):
497 return
498 for file in self._log[which_pass].keys():
499 Log().write(LOG_VERBOSE, "Deleting", file)
500 if self._callbacks.has_key(file):
501 self._callbacks[file]()
502 os.unlink(file)
505 # Always use these constants for opening databases.
506 DB_OPEN_READ = 'r'
507 DB_OPEN_NEW = 'n'
509 # A wrapper for anydbm that uses the marshal module to store items as
510 # strings.
511 class Database:
512 def __init__(self, filename, mode):
513 # pybsddb3 has a bug which prevents it from working with
514 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
515 # causes the DB_TRUNCATE flag to be passed, which is disallowed
516 # for databases protected by lock and transaction support
517 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
519 # Therefore, manually perform the removal (we can do this, because
520 # we know that for bsddb - but *not* anydbm in general - the database
521 # consists of one file with the name we specify, rather than several
522 # based on that name).
523 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
524 if os.path.isfile(filename):
525 os.unlink(filename)
526 mode = 'c'
528 self.db = anydbm.open(filename, mode)
530 def has_key(self, key):
531 return self.db.has_key(key)
533 def __getitem__(self, key):
534 return marshal.loads(self.db[key])
536 def __setitem__(self, key, value):
537 self.db[key] = marshal.dumps(value)
539 def __delitem__(self, key):
540 del self.db[key]
542 def get(self, key, default):
543 if self.has_key(key):
544 return self.__getitem__(key)
545 return default
548 class StatsKeeper:
549 __shared_state = { }
550 def __init__(self):
551 self.__dict__ = self.__shared_state
552 if self.__dict__:
553 return
554 self.filename = temp(STATISTICS_FILE)
555 Cleanup().register(self.filename, pass8)
556 # This can get kinda large, so we don't store it in our data dict.
557 self.repos_files = { }
559 if os.path.exists(self.filename):
560 self.unarchive()
561 else:
562 self.data = { 'cvs_revs_count' : 0,
563 'tags': { },
564 'branches' : { },
565 'repos_size' : 0,
566 'repos_file_count' : 0,
567 'svn_rev_count' : None,
568 'first_rev_date' : 1L<<32,
569 'last_rev_date' : 0,
570 'pass_timings' : { },
571 'start_time' : 0,
572 'end_time' : 0,
575 def log_duration_for_pass(self, duration, pass_num):
576 self.data['pass_timings'][pass_num] = duration
578 def set_start_time(self, start):
579 self.data['start_time'] = start
581 def set_end_time(self, end):
582 self.data['end_time'] = end
584 def _bump_item(self, key, amount=1):
585 self.data[key] = self.data[key] + amount
587 def reset_c_rev_info(self):
588 self.data['cvs_revs_count'] = 0
589 self.data['tags'] = { }
590 self.data['branches'] = { }
592 def record_c_rev(self, c_rev):
593 self._bump_item('cvs_revs_count')
595 for tag in c_rev.tags:
596 self.data['tags'][tag] = None
597 for branch in c_rev.branches:
598 self.data['branches'][branch] = None
600 if c_rev.timestamp < self.data['first_rev_date']:
601 self.data['first_rev_date'] = c_rev.timestamp
603 if c_rev.timestamp > self.data['last_rev_date']:
604 self.data['last_rev_date'] = c_rev.timestamp
606 # Only add the size if this is the first time we see the file.
607 if not self.repos_files.has_key(c_rev.fname):
608 self._bump_item('repos_size', c_rev.file_size)
609 self.repos_files[c_rev.fname] = None
611 self.data['repos_file_count'] = len(self.repos_files)
613 def set_svn_rev_count(self, count):
614 self.data['svn_rev_count'] = count
616 def svn_rev_count(self):
617 return self.data['svn_rev_count']
619 def archive(self):
620 open(self.filename, 'w').write(marshal.dumps(self.data))
622 def unarchive(self):
623 self.data = marshal.loads(open(self.filename, 'r').read())
625 def __str__(self):
626 svn_revs_str = ""
627 if self.data['svn_rev_count'] is not None:
628 svn_revs_str = ('Total SVN Commits: %10s\n'
629 % self.data['svn_rev_count'])
631 return ('\n' \
632 'cvs2svn Statistics:\n' \
633 '------------------\n' \
634 'Total CVS Files: %10i\n' \
635 'Total CVS Revisions: %10i\n' \
636 'Total Unique Tags: %10i\n' \
637 'Total Unique Branches: %10i\n' \
638 'CVS Repos Size in KB: %10i\n' \
639 '%s' \
640 'First Revision Date: %s\n' \
641 'Last Revision Date: %s\n' \
642 '------------------' \
643 % (self.data['repos_file_count'],
644 self.data['cvs_revs_count'],
645 len(self.data['tags']),
646 len(self.data['branches']),
647 (self.data['repos_size'] / 1024),
648 svn_revs_str,
649 time.ctime(self.data['first_rev_date']),
650 time.ctime(self.data['last_rev_date']),
653 def timings(self):
654 passes = self.data['pass_timings'].keys()
655 passes.sort()
656 str = 'Timings:\n------------------\n'
658 def desc(val):
659 if val == 1: return "second"
660 return "seconds"
662 for pass_num in passes:
663 duration = int(self.data['pass_timings'][pass_num])
664 p_str = ('pass %d:%6d %s\n'
665 % (pass_num, duration, desc(duration)))
666 str = str + p_str
668 total = int(self.data['end_time'] - self.data['start_time'])
669 str = str + ('total: %6d %s' % (total, desc(total)))
670 return str
673 class LastSymbolicNameDatabase:
674 """ Passing every CVSRevision in s-revs to this class will result in
675 a Database whose key is the last CVS Revision a symbolicname was
676 seen in, and whose value is a list of all symbolicnames that were
677 last seen in that revision."""
678 def __init__(self, mode):
679 self.symbols = {}
680 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
681 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
683 # Once we've gone through all the revs,
684 # symbols.keys() will be a list of all tags and branches, and
685 # their corresponding values will be a key into the last CVS revision
686 # that they were used in.
687 def log_revision(self, c_rev):
688 # Gather last CVS Revision for symbolic name info and tag info
689 for tag in c_rev.tags:
690 self.symbols[tag] = c_rev.unique_key()
691 if c_rev.op is not OP_DELETE:
692 for branch in c_rev.branches:
693 self.symbols[branch] = c_rev.unique_key()
695 # Creates an inversion of symbols above--a dictionary of lists (key
696 # = CVS rev unique_key: val = list of symbols that close in that
697 # rev.
698 def create_database(self):
699 for sym, rev_unique_key in self.symbols.items():
700 if self.symbol_revs_db.has_key(rev_unique_key):
701 ary = self.symbol_revs_db[rev_unique_key]
702 ary.append(sym)
703 self.symbol_revs_db[rev_unique_key] = ary
704 else:
705 self.symbol_revs_db[rev_unique_key] = [sym]
708 class CVSRevisionDatabase:
709 """A Database to store CVSRevision objects and retrieve them by their
710 unique_key()."""
712 def __init__(self, mode):
713 """Initialize an instance, opening database in MODE (like the MODE
714 argument to Database or anydbm.open())."""
715 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
716 Cleanup().register(temp(CVS_REVS_DB), pass8)
718 def log_revision(self, c_rev):
719 """Add C_REV, a CVSRevision, to the database."""
720 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
722 def get_revision(self, unique_key):
723 """Return the CVSRevision stored under UNIQUE_KEY."""
724 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
727 class TagsDatabase(Database):
728 """A Database to store which symbolic names are tags.
729 Each key is a tag name.
730 The value has no meaning, and should be set to None."""
731 def __init__(self, mode):
732 Database.__init__(self, temp(TAGS_DB), mode)
733 Cleanup().register(temp(TAGS_DB), pass8)
736 class CVSRevision:
737 def __init__(self, ctx, *args):
738 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
740 If CTX is None, the following members and methods of the
741 instantiated CVSRevision class object will be unavailable (or
742 simply will not work correctly, if at all):
743 cvs_path
744 svn_path
745 svn_trunk_path
746 is_default_branch_revision()
748 (Note that this class treats CTX as const, because the caller
749 likely passed in a Borg instance of a Ctx. The reason this class
750 takes CTX as as a parameter, instead of just instantiating a Ctx
751 itself, is that this class should be usable outside cvs2svn.)
753 If there is one argument in ARGS, it is a string, in the format of
754 a line from a revs file. Do *not* include a trailing newline.
756 If there are multiple ARGS, there must be 16 of them,
757 comprising a parsed revs line:
758 timestamp --> (int) date stamp for this cvs revision
759 digest --> (string) digest of author+logmsg
760 prev_timestamp --> (int) date stamp for the previous cvs revision
761 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
762 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
763 rev --> (string) this CVS rev, e.g., "1.3"
764 next_rev --> (string or None) next CVS rev, e.g., "1.4"
765 file_in_attic --> (char or None) true if RCS file is in Attic
766 file_executable --> (char or None) true if RCS file has exec bit set.
767 file_size --> (int) size of the RCS file
768 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
769 mode --> (string or None) "kkv", "kb", etc.
770 branch_name --> (string or None) branch on which this rev occurred
771 tags --> (list of strings) all tags on this revision
772 branches --> (list of strings) all branches rooted in this rev
773 fname --> (string) relative path of file in CVS repos
775 The two forms of initialization are equivalent."""
777 self._ctx = ctx
778 if len(args) == 16:
779 (self.timestamp, self.digest, self.prev_timestamp, self.op,
780 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
781 self.file_executable, self.file_size, self.deltatext_code, self.fname,
782 self.mode, self.branch_name, self.tags, self.branches) = args
783 elif len(args) == 1:
784 data = args[0].split(' ', 14)
785 self.timestamp = int(data[0], 16)
786 self.digest = data[1]
787 if data[2] == "*":
788 self.prev_timestamp = 0
789 else:
790 self.prev_timestamp = int(data[2])
791 self.op = data[3]
792 self.prev_rev = data[4]
793 if self.prev_rev == "*":
794 self.prev_rev = None
795 self.rev = data[5]
796 self.next_rev = data[6]
797 if self.next_rev == "*":
798 self.next_rev = None
799 self.file_in_attic = data[7]
800 if self.file_in_attic == "*":
801 self.file_in_attic = None
802 self.file_executable = data[8]
803 if self.file_executable == "*":
804 self.file_executable = None
805 self.file_size = int(data[9])
806 self.deltatext_code = data[10]
807 self.mode = data[11]
808 if self.mode == "*":
809 self.mode = None
810 self.branch_name = data[12]
811 if self.branch_name == "*":
812 self.branch_name = None
813 ntags = int(data[13])
814 tags = data[14].split(' ', ntags + 1)
815 nbranches = int(tags[ntags])
816 branches = tags[ntags + 1].split(' ', nbranches)
817 self.fname = branches[nbranches]
818 self.tags = tags[:ntags]
819 self.branches = branches[:nbranches]
820 else:
821 raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
822 (len(args) + 1)
823 if ctx is not None:
824 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
825 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
826 self.svn_trunk_path = self._make_path(self.cvs_path)
828 # The 'primary key' of a CVS Revision is the revision number + the
829 # filename. To provide a unique key (say, for a dict), we just glom
830 # them together in a string. By passing in self.prev_rev or
831 # self.next_rev, you can get the unique key for their respective
832 # CVSRevisions.
833 def unique_key(self, revnum=None):
834 if revnum is None:
835 revnum = self.rev
836 return revnum + "/" + self.fname
838 def __str__(self):
839 return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
840 self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
841 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
842 (self.file_in_attic or "*"), (self.file_executable or "*"),
843 self.file_size,
844 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
845 len(self.tags), self.tags and " " or "", " ".join(self.tags),
846 len(self.branches), self.branches and " " or "", " ".join(self.branches),
847 self.fname, ))
849 # Returns true if this CVSRevision is the opening CVSRevision for
850 # NAME (for this RCS file).
851 def opens_symbolic_name(self, name):
852 if name in self.tags:
853 return 1
854 if name in self.branches:
855 # If this c_rev opens a branch and our op is OP_DELETE, then
856 # that means that the file that this c_rev belongs to was
857 # created on the branch, so for all intents and purposes, this
858 # c_rev is *technically* not an opening. See Issue #62 for more
859 # information.
860 if self.op != OP_DELETE:
861 return 1
862 return 0
864 def is_default_branch_revision(self):
865 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
866 revision according to DEFAULT_BRANCHES_DB (see the conditions
867 documented there), else return None."""
868 if self._ctx._default_branches_db.has_key(self.cvs_path):
869 val = self._ctx._default_branches_db[self.cvs_path]
870 val_last_dot = val.rindex(".")
871 our_last_dot = self.rev.rindex(".")
872 default_branch = val[:val_last_dot]
873 our_branch = self.rev[:our_last_dot]
874 default_rev_component = int(val[val_last_dot + 1:])
875 our_rev_component = int(self.rev[our_last_dot + 1:])
876 if (default_branch == our_branch
877 and our_rev_component <= default_rev_component):
878 return 1
879 # else
880 return None
882 def _make_path(self, path, branch_name = None):
883 """Return the trunk path or branch path for PATH.
885 If PATH is None, return None."""
886 # For a while, we treated each top-level subdir of the CVS
887 # repository as a "project root" and interpolated the appropriate
888 # genealogy (trunk|tag|branch) in according to the official
889 # recommended layout. For example, the path '/foo/bar/baz.c' on
890 # branch 'Rel2' would become
892 # /foo/branches/Rel2/bar/baz.c
894 # and on trunk it would become
896 # /foo/trunk/bar/baz.c
898 # However, we went back to the older and simpler method of just
899 # prepending the genealogy to the front, instead of interpolating.
900 # So now we produce:
902 # /branches/Rel2/foo/bar/baz.c
903 # /trunk/foo/bar/baz.c
905 # Why? Well, Jack Repenning pointed out that this way is much
906 # friendlier to "anonymously rooted subtrees" (that's a tree where
907 # the name of the top level dir doesn't matter, the point is that if
908 # you cd into it and, say, run 'make', something good will happen).
909 # By interpolating, we made it impossible to point cvs2svn at some
910 # subdir in the CVS repository and convert it as a project, because
911 # we'd treat every subdir underneath it as an independent project
912 # root, which is probably not what the user wanted.
914 # Also, see Blair Zajac's post
916 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
918 # and the surrounding thread, for why what people really want is a
919 # way of specifying an in-repository prefix path, not interpolation.
920 if path is None:
921 return None
923 if branch_name:
924 branch_name = _clean_symbolic_name(branch_name)
925 return self._ctx.branches_base + '/' + branch_name + '/' + path
926 else:
927 return self._ctx.trunk_base + '/' + path
929 def rcs_path(self):
930 """Returns the actual filesystem path to the RCS file of this
931 CVSRevision."""
932 if self.file_in_attic is None:
933 return self.fname
934 else:
935 basepath, filename = os.path.split(self.fname)
936 return os.path.join(basepath, 'Attic', filename)
938 def filename(self):
939 "Return the last path component of self.fname, minus the ',v'"
940 return os.path.split(self.fname)[-1][:-2]
942 class SymbolDatabase:
943 """This database records information on all symbols in the RCS
944 files. It is created in pass 1 and it is used in pass 2."""
945 def __init__(self):
946 # A hash that maps tag names to commit counts
947 self.tags = { }
948 # A hash that maps branch names to lists of the format
949 # [ create_count, commit_count, blockers ], where blockers
950 # is a hash that lists the symbols that depend on the
951 # the branch. The blockers hash is used as a set, so the
952 # values are not used.
953 self.branches = { }
955 def register_tag_creation(self, name):
956 """Register the creation of the tag NAME."""
957 if not self.tags.has_key(name):
958 self.tags[name] = 0
959 self.tags[name] += 1
961 def _branch(self, name):
962 """Helper function to get a branch node that will create and
963 initialize the node if it does not exist."""
964 if not self.branches.has_key(name):
965 self.branches[name] = [ 0, 0, { } ]
966 return self.branches[name]
968 def register_branch_creation(self, name):
969 """Register the creation of the branch NAME."""
970 self._branch(name)[0] += 1
972 def register_branch_commit(self, name):
973 """Register a commit on the branch NAME."""
974 self._branch(name)[1] += 1
976 def register_branch_blocker(self, name, blocker):
977 """Register BLOCKER as a blocker on the branch NAME."""
978 self._branch(name)[2][blocker] = None
980 def branch_has_commit(self, name):
981 """Return non-zero if NAME has commits. Returns 0 if name
982 is not a branch or if it has no commits."""
983 return self.branches.has_key(name) and self.branches[name][1]
985 def find_excluded_symbols(self, regexp_list):
986 """Returns a hash of all symbols thaht match the regexps in
987 REGEXP_LISTE. The hash is used as a set so the values are
988 not used."""
989 excludes = { }
990 for tag in self.tags.keys():
991 if match_regexp_list(regexp_list, tag):
992 excludes[tag] = None
993 for branch in self.branches.keys():
994 if match_regexp_list(regexp_list, branch):
995 excludes[branch] = None
996 return excludes
998 def find_branch_exclude_blockers(self, branch, excludes):
999 """Find all blockers of BRANCH, excluding the ones in the hash
1000 EXCLUDES."""
1001 blockers = { }
1002 if excludes.has_key(branch):
1003 for blocker in self.branches[branch][2]:
1004 if not excludes.has_key(blocker):
1005 blockers[blocker] = None
1006 return blockers
1008 def find_blocked_excludes(self, excludes):
1009 """Find all branches not in EXCLUDES that have blocking symbols that
1010 are not themselves excluded. Return a hash that maps branch names
1011 to a hash of blockers. The hash of blockes is used as a set so the
1012 values are not used."""
1013 blocked_branches = { }
1014 for branch in self.branches.keys():
1015 blockers = self.find_branch_exclude_blockers(branch, excludes)
1016 if blockers:
1017 blocked_branches[branch] = blockers
1018 return blocked_branches
1020 def find_mismatches(self, excludes=None):
1021 """Find all symbols that are defined as both tags and branches,
1022 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1023 the symbol name, tag count, branch count and commit count."""
1024 if excludes is None:
1025 excludes = { }
1026 mismatches = [ ]
1027 for branch in self.branches.keys():
1028 if not excludes.has_key(branch) and self.tags.has_key(branch):
1029 mismatches.append((branch, # name
1030 self.tags[branch], # tag count
1031 self.branches[branch][0], # branch count
1032 self.branches[branch][1])) # commit count
1033 return mismatches
1035 def read(self):
1036 """Read the symbol database from files."""
1037 f = open(temp(TAGS_LIST))
1038 while 1:
1039 line = f.readline()
1040 if not line:
1041 break
1042 tag, count = line.split()
1043 self.tags[tag] = int(count)
1045 f = open(temp(BRANCHES_LIST))
1046 while 1:
1047 line = f.readline()
1048 if not line:
1049 break
1050 words = line.split()
1051 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1052 for blocker in words[3:]:
1053 self.branches[words[0]][2][blocker] = None
1055 def write(self):
1056 """Store the symbol database to files."""
1057 f = open(temp(TAGS_LIST), "w")
1058 Cleanup().register(temp(TAGS_LIST), pass2)
1059 for tag, count in self.tags.items():
1060 f.write("%s %d\n" % (tag, count))
1062 f = open(temp(BRANCHES_LIST), "w")
1063 Cleanup().register(temp(BRANCHES_LIST), pass2)
1064 for branch, info in self.branches.items():
1065 f.write("%s %d %d" % (branch, info[0], info[1]))
1066 if info[2]:
1067 f.write(" ")
1068 f.write(" ".join(info[2].keys()))
1069 f.write("\n")
1071 class CollectData(cvs2svn_rcsparse.Sink):
1072 def __init__(self):
1073 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1074 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1075 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1076 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1077 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
1078 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1079 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1080 Cleanup().register(temp(METADATA_DB), pass8)
1081 self.fatal_errors = []
1082 self.num_files = 0
1083 self.symbol_db = SymbolDatabase()
1085 # 1 if we've collected data for at least one file, None otherwise.
1086 self.found_valid_file = None
1088 # See set_fname() for initializations of other variables.
1090 def set_fname(self, canonical_name, filename):
1091 """Prepare to receive data for FILENAME. FILENAME is the absolute
1092 filesystem path to the file in question, and CANONICAL_NAME is
1093 FILENAME with the 'Attic' component removed (if the file is indeed
1094 in the Attic) ."""
1095 self.fname = canonical_name
1097 # We calculate and save some file metadata here, where we can do
1098 # it only once per file, instead of waiting until later where we
1099 # would have to do the same calculations once per CVS *revision*.
1101 self.rel_name = relative_name(Ctx().cvsroot, self.fname)[:-2]
1103 # If the paths are not the same, then that means that the
1104 # canonical_name has had the 'Attic' component stripped out.
1105 self.file_in_attic = None
1106 if not canonical_name == filename:
1107 self.file_in_attic = 1
1109 file_stat = os.stat(filename)
1110 # The size of our file in bytes
1111 self.file_size = file_stat[stat.ST_SIZE]
1113 # Whether or not the executable bit is set.
1114 self.file_executable = None
1115 if file_stat[0] & stat.S_IXUSR:
1116 self.file_executable = 1
1118 # revision -> [timestamp, author, old-timestamp]
1119 self.rev_data = { }
1121 # Maps revision number (key) to the revision number of the
1122 # previous revision along this line of development.
1124 # For the first revision R on a branch, we consider the revision
1125 # from which R sprouted to be the 'previous'.
1127 # Note that this revision can't be determined arithmetically (due
1128 # to cvsadmin -o, which is why this is necessary).
1129 self.prev_rev = { }
1131 # This dict is essentially self.prev_rev with the values mapped in
1132 # the other direction, so following key -> value will yield you
1133 # the next revision number
1134 self.next_rev = { }
1136 # Track the state of each revision so that in set_revision_info,
1137 # we can determine if our op is an add/change/delete. We can do
1138 # this because in set_revision_info, we'll have all of the
1139 # revisions for a file at our fingertips, and we need to examine
1140 # the state of our prev_rev to determine if we're an add or a
1141 # change--without the state of the prev_rev, we are unable to
1142 # distinguish between an add and a change.
1143 self.rev_state = { }
1145 # Hash mapping branch numbers, like '1.7.2', to branch names,
1146 # like 'Release_1_0_dev'.
1147 self.branch_names = { }
1149 # RCS flags (used for keyword expansion).
1150 self.mode = None
1152 # Hash mapping revision numbers, like '1.7', to lists of names
1153 # indicating which branches sprout from that revision, like
1154 # ['Release_1_0_dev', 'experimental_driver', ...].
1155 self.branchlist = { }
1157 # Like self.branchlist, but the values are lists of tag names that
1158 # apply to the key revision.
1159 self.taglist = { }
1161 # If set, this is an RCS branch number -- rcsparse calls this the
1162 # "principal branch", but CVS and RCS refer to it as the "default
1163 # branch", so that's what we call it, even though the rcsparse API
1164 # setter method is still 'set_principal_branch'.
1165 self.default_branch = None
1167 # If the RCS file doesn't have a default branch anymore, but does
1168 # have vendor revisions, then we make an educated guess that those
1169 # revisions *were* the head of the default branch up until the
1170 # commit of 1.2, at which point the file's default branch became
1171 # trunk. This records the date at which 1.2 was committed.
1172 self.first_non_vendor_revision_date = None
1174 # A list of all symbols defined for the current file. Used to
1175 # prevent multiple definitions of a symbol, something which can
1176 # easily happen when --symbol-transform is used.
1177 self.defined_symbols = [ ]
1179 def set_principal_branch(self, branch):
1180 self.default_branch = branch
1182 def set_expansion(self, mode):
1183 self.mode = mode
1185 def set_branch_name(self, branch_number, name):
1186 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1187 and that NAME sprouts from BRANCH_NUMBER .
1188 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1189 for example '1.7.2' (never '1.7.0.2')."""
1190 if not self.branch_names.has_key(branch_number):
1191 self.branch_names[branch_number] = name
1192 # The branchlist is keyed on the revision number from which the
1193 # branch sprouts, so strip off the odd final component.
1194 sprout_rev = branch_number[:branch_number.rfind(".")]
1195 if not self.branchlist.has_key(sprout_rev):
1196 self.branchlist[sprout_rev] = []
1197 self.branchlist[sprout_rev].append(name)
1198 self.symbol_db.register_branch_creation(name)
1199 else:
1200 sys.stderr.write("%s: in '%s':\n"
1201 " branch '%s' already has name '%s',\n"
1202 " cannot also have name '%s', ignoring the latter\n"
1203 % (warning_prefix, self.fname, branch_number,
1204 self.branch_names[branch_number], name))
1206 def rev_to_branch_name(self, revision):
1207 """Return the name of the branch on which REVISION lies.
1208 REVISION is a non-branch revision number with an even number of,
1209 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1210 For the convenience of callers, REVISION can also be a trunk
1211 revision such as '1.2', in which case just return None."""
1212 if trunk_rev.match(revision):
1213 return None
1214 return self.branch_names.get(revision[:revision.rindex(".")])
1216 def add_cvs_branch(self, revision, branch_name):
1217 """Record the root revision and branch revision for BRANCH_NAME,
1218 based on REVISION. REVISION is a CVS branch number having an even
1219 number of components where the second-to-last is '0'. For
1220 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1221 from 1.7 and has branch number 1.7.2."""
1222 last_dot = revision.rfind(".")
1223 branch_rev = revision[:last_dot]
1224 last2_dot = branch_rev.rfind(".")
1225 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1226 self.set_branch_name(branch_rev, branch_name)
1228 def define_tag(self, name, revision):
1229 """Record a bidirectional mapping between symbolic NAME and REVISION.
1230 REVISION is an unprocessed revision number from the RCS file's
1231 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1232 This function will determine what kind of symbolic name it is by
1233 inspection, and record it in the right places."""
1234 for (pattern, replacement) in Ctx().symbol_transforms:
1235 newname = re.sub(pattern, replacement, name)
1236 if newname != name:
1237 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1238 % (name, newname))
1239 name = newname
1240 if name in self.defined_symbols:
1241 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1242 % (error_prefix, name, self.fname)
1243 sys.stderr.write(err + "\n")
1244 self.fatal_errors.append(err)
1245 self.defined_symbols.append(name)
1246 if branch_tag.match(revision):
1247 self.add_cvs_branch(revision, name)
1248 elif vendor_tag.match(revision):
1249 self.set_branch_name(revision, name)
1250 else:
1251 if not self.taglist.has_key(revision):
1252 self.taglist[revision] = []
1253 self.taglist[revision].append(name)
1254 self.symbol_db.register_tag_creation(name)
1256 def define_revision(self, revision, timestamp, author, state,
1257 branches, next):
1259 # Record the state of our revision for later calculations
1260 self.rev_state[revision] = state
1262 # store the rev_data as a list in case we have to jigger the timestamp
1263 self.rev_data[revision] = [int(timestamp), author, None]
1265 # When on trunk, the RCS 'next' revision number points to what
1266 # humans might consider to be the 'previous' revision number. For
1267 # example, 1.3's RCS 'next' is 1.2.
1269 # However, on a branch, the RCS 'next' revision number really does
1270 # point to what humans would consider to be the 'next' revision
1271 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1273 # In other words, in RCS, 'next' always means "where to find the next
1274 # deltatext that you need this revision to retrieve.
1276 # That said, we don't *want* RCS's behavior here, so we determine
1277 # whether we're on trunk or a branch and set self.prev_rev
1278 # accordingly.
1280 # One last thing. Note that if REVISION is a branch revision,
1281 # instead of mapping REVISION to NEXT, we instead map NEXT to
1282 # REVISION. Since we loop over all revisions in the file before
1283 # doing anything with the data we gather here, this 'reverse
1284 # assignment' effectively does the following:
1286 # 1. Gives us no 'prev' value for REVISION (in this
1287 # iteration... it may have been set in a previous iteration)
1289 # 2. Sets the 'prev' value for the revision with number NEXT to
1290 # REVISION. So when we come around to the branch revision whose
1291 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1292 # set.
1293 if trunk_rev.match(revision):
1294 self.prev_rev[revision] = next
1295 self.next_rev[next] = revision
1296 elif next:
1297 self.prev_rev[next] = revision
1298 self.next_rev[revision] = next
1300 for b in branches:
1301 self.prev_rev[b] = revision
1303 # Ratchet up the highest vendor head revision, if necessary.
1304 if self.default_branch:
1305 default_branch_root = self.default_branch + "."
1306 if ((revision.find(default_branch_root) == 0)
1307 and (default_branch_root.count('.') == revision.count('.'))):
1308 # This revision is on the default branch, so record that it is
1309 # the new highest default branch head revision.
1310 self.default_branches_db[self.rel_name] = revision
1311 else:
1312 # No default branch, so make an educated guess.
1313 if revision == '1.2':
1314 # This is probably the time when the file stopped having a
1315 # default branch, so make a note of it.
1316 self.first_non_vendor_revision_date = timestamp
1317 else:
1318 m = vendor_revision.match(revision)
1319 if m and ((not self.first_non_vendor_revision_date)
1320 or (timestamp < self.first_non_vendor_revision_date)):
1321 # We're looking at a vendor revision, and it wasn't
1322 # committed after this file lost its default branch, so bump
1323 # the maximum trunk vendor revision in the permanent record.
1324 self.default_branches_db[self.rel_name] = revision
1326 if not trunk_rev.match(revision):
1327 # Check for unlabeled branches, record them. We tried to collect
1328 # all branch names when we parsed the symbolic name header
1329 # earlier, of course, but that didn't catch unlabeled branches.
1330 # If a branch is unlabeled, this is our first encounter with it,
1331 # so we have to record its data now.
1332 branch_number = revision[:revision.rindex(".")]
1333 if not self.branch_names.has_key(branch_number):
1334 branch_name = "unlabeled-" + branch_number
1335 self.set_branch_name(branch_number, branch_name)
1337 # Register the commit on this non-trunk branch
1338 branch_name = self.branch_names[branch_number]
1339 self.symbol_db.register_branch_commit(branch_name)
1341 def tree_completed(self):
1342 "The revision tree has been parsed. Analyze it for consistency."
1344 # Our algorithm depends upon the timestamps on the revisions occuring
1345 # monotonically over time. That is, we want to see rev 1.34 occur in
1346 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1347 # sorting), and then tried to insert 1.34, we'd be screwed.
1349 # to perform the analysis, we'll simply visit all of the 'previous'
1350 # links that we have recorded and validate that the timestamp on the
1351 # previous revision is before the specified revision
1353 # if we have to resync some nodes, then we restart the scan. just keep
1354 # looping as long as we need to restart.
1355 while 1:
1356 for current, prev in self.prev_rev.items():
1357 if not prev:
1358 # no previous revision exists (i.e. the initial revision)
1359 continue
1360 t_c = self.rev_data[current][0]
1361 t_p = self.rev_data[prev][0]
1362 if t_p >= t_c:
1363 # the previous revision occurred later than the current revision.
1364 # shove the previous revision back in time (and any before it that
1365 # may need to shift).
1367 # We sync backwards and not forwards because any given CVS
1368 # Revision has only one previous revision. However, a CVS
1369 # Revision can *be* a previous revision for many other
1370 # revisions (e.g., a revision that is the source of multiple
1371 # branches). This becomes relevant when we do the secondary
1372 # synchronization in pass 2--we can make certain that we
1373 # don't resync a revision earlier than it's previous
1374 # revision, but it would be non-trivial to make sure that we
1375 # don't resync revision R *after* any revisions that have R
1376 # as a previous revision.
1377 while t_p >= t_c:
1378 self.rev_data[prev][0] = t_c - 1 # new timestamp
1379 self.rev_data[prev][2] = t_p # old timestamp
1380 delta = t_c - 1 - t_p
1381 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1382 % (self.rel_name,
1383 prev, time.ctime(t_p), delta)
1384 Log().write(LOG_VERBOSE, msg)
1385 if (delta > COMMIT_THRESHOLD
1386 or delta < (COMMIT_THRESHOLD * -1)):
1387 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1388 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1389 delta))
1390 current = prev
1391 prev = self.prev_rev[current]
1392 if not prev:
1393 break
1394 t_c = t_c - 1 # self.rev_data[current][0]
1395 t_p = self.rev_data[prev][0]
1397 # break from the for-loop
1398 break
1399 else:
1400 # finished the for-loop (no resyncing was performed)
1401 return
1403 def set_revision_info(self, revision, log, text):
1404 timestamp, author, old_ts = self.rev_data[revision]
1405 digest = sha.new(log + '\0' + author).hexdigest()
1406 if old_ts:
1407 # the timestamp on this revision was changed. log it for later
1408 # resynchronization of other files's revisions that occurred
1409 # for this time and log message.
1410 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1412 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1413 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1415 # If revision 1.1 appears to have been created via 'cvs add'
1416 # instead of 'cvs import', then this file probably never had a
1417 # default branch, so retroactively remove its record in the
1418 # default branches db. The test is that the log message CVS uses
1419 # for 1.1 in imports is "Initial revision\n" with no period.
1420 if revision == '1.1' and log != 'Initial revision\n':
1421 if self.default_branches_db.has_key(self.rel_name):
1422 del self.default_branches_db[self.rel_name]
1424 # Get the timestamp of the previous revision
1425 prev_rev = self.prev_rev.get(revision, None)
1426 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1428 # How to tell if a CVSRevision is an add, a change, or a deletion:
1430 # It's a delete if RCS state is 'dead'
1432 # It's an add if RCS state is 'Exp.' and
1433 # - we either have no previous revision
1434 # or
1435 # - we have a previous revision whose state is 'dead'
1437 # Anything else is a change.
1438 if self.rev_state[revision] == 'dead':
1439 op = OP_DELETE
1440 elif ((self.prev_rev.get(revision, None) is None)
1441 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1442 op = OP_ADD
1443 else:
1444 op = OP_CHANGE
1446 if text:
1447 deltatext_code = DELTATEXT_NONEMPTY
1448 else:
1449 deltatext_code = DELTATEXT_EMPTY
1451 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
1452 self.prev_rev[revision], revision,
1453 self.next_rev.get(revision),
1454 self.file_in_attic, self.file_executable,
1455 self.file_size,
1456 deltatext_code, self.fname,
1457 self.mode, self.rev_to_branch_name(revision),
1458 self.taglist.get(revision, []),
1459 self.branchlist.get(revision, []))
1460 self.revs.write(str(c_rev) + "\n")
1461 StatsKeeper().record_c_rev(c_rev)
1463 if not self.metadata_db.has_key(digest):
1464 self.metadata_db[digest] = (author, log)
1466 def parse_completed(self):
1467 # Walk through all branches and tags and register them with
1468 # their parent branch in the symbol database.
1469 for revision, symbols in self.taglist.items() + self.branchlist.items():
1470 for symbol in symbols:
1471 name = self.rev_to_branch_name(revision)
1472 if name is not None:
1473 self.symbol_db.register_branch_blocker(name, symbol)
1475 self.num_files = self.num_files + 1
1477 def write_symbol_db(self):
1478 self.symbol_db.write()
1480 class SymbolingsLogger:
1481 """Manage the file that contains lines for symbol openings and
1482 closings.
1484 This data will later be used to determine valid SVNRevision ranges
1485 from which a file can be copied when creating a branch or tag in
1486 Subversion. Do this by finding "Openings" and "Closings" for each
1487 file copied onto a branch or tag.
1489 An "Opening" is the CVSRevision from which a given branch/tag
1490 sprouts on a path.
1492 The "Closing" for that branch/tag and path is the next CVSRevision
1493 on the same line of development as the opening.
1495 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1496 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1497 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1498 'foo.c'. Note that there may be many revisions chronologically
1499 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1500 perhaps even including on branch BEE itself. But 1.3 is the next
1501 revision *on the same line* as 1.2, that is why it is the closing
1502 revision for those symbolic names of which 1.2 is the opening.
1504 The reason for doing all this hullabaloo is to make branch and tag
1505 creation as efficient as possible by minimizing the number of copies
1506 and deletes per creation. For example, revisions 1.2 and 1.3 of
1507 foo.c might correspond to revisions 17 and 30 in Subversion. That
1508 means that when creating branch BEE, there is some motivation to do
1509 the copy from one of 17-30. Now if there were another file,
1510 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1511 to revisions 24 and 39 in Subversion, we would know that the ideal
1512 thing would be to copy the branch from somewhere between 24 and 29,
1513 inclusive.
1515 def __init__(self):
1516 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1517 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1518 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1519 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1521 # This keys of this dictionary are Subversion repository *source*
1522 # paths for which we've encountered an 'opening'. The values are
1523 # the symbolic names that this path has opened. The only paths
1524 # that should be in this dict are paths whose corresponding
1525 # CVSRevision is a default branch revision.
1526 self.open_paths_with_default_branches = { }
1528 def log_revision(self, c_rev, svn_revnum):
1529 """Log any openings found in C_REV, and if C_REV.next_rev is not
1530 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1531 any) will have its revnum determined later."""
1532 for name in c_rev.tags + c_rev.branches:
1533 name = _clean_symbolic_name(name)
1534 self._note_default_branch_opening(c_rev, name)
1535 if c_rev.op != OP_DELETE:
1536 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1538 # If our c_rev has a next_rev, then that's the closing rev for
1539 # this source revision. Log it to closings for later processing
1540 # since we don't know the svn_revnum yet.
1541 if c_rev.next_rev is not None:
1542 self.closings.write('%s %s\n' %
1543 (name, c_rev.unique_key(c_rev.next_rev)))
1545 def _log(self, name, svn_revnum, svn_path, type):
1546 """Write out a single line to the symbol_openings_closings file
1547 representing that svn_revnum of svn_path is either the opening or
1548 closing (TYPE) of NAME (a symbolic name).
1550 TYPE should only be one of the following global constants:
1551 OPENING or CLOSING."""
1552 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1553 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1554 type, svn_path))
1556 def close(self):
1557 """Iterate through the closings file, lookup the svn_revnum for
1558 each closing CVSRevision, and write a proper line out to the
1559 symbolings file."""
1560 # Use this to get the c_rev.svn_path of our rev_key
1561 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1563 self.closings.close()
1564 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1565 (name, rev_key) = line.rstrip().split(" ", 1)
1566 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1568 c_rev = cvs_revs_db.get_revision(rev_key)
1569 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1571 self.symbolings.close()
1573 def _note_default_branch_opening(self, c_rev, symbolic_name):
1574 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1575 as an opening for SYMBOLIC_NAME."""
1576 path = c_rev.svn_trunk_path
1577 if not self.open_paths_with_default_branches.has_key(path):
1578 self.open_paths_with_default_branches[path] = [ ]
1579 self.open_paths_with_default_branches[path].append(symbolic_name)
1581 def log_default_branch_closing(self, c_rev, svn_revnum):
1582 """If self.open_paths_with_default_branches contains
1583 C_REV.svn_trunk_path, then call log each name in
1584 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1585 closing with SVN_REVNUM as the closing revision number. """
1586 path = c_rev.svn_trunk_path
1587 if self.open_paths_with_default_branches.has_key(path):
1588 # log each symbol as a closing
1589 for name in self.open_paths_with_default_branches[path]:
1590 self._log(name, svn_revnum, path, CLOSING)
1591 # Remove them from the openings list as we're done with them.
1592 del self.open_paths_with_default_branches[path]
1595 class PersistenceManager:
1596 """The PersistenceManager allows us to effectively store SVNCommits
1597 to disk and retrieve them later using only their subversion revision
1598 number as the key. It also returns the subversion revision number
1599 for a given CVSRevision's unique key.
1601 All information pertinent to each SVNCommit is stored in a series of
1602 on-disk databases so that SVNCommits can be retrieved on-demand.
1604 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1605 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1606 databases and be fully-featured.
1607 In 'read' mode, PersistenceManager will open existing on-disk databases
1608 and the set_* methods will be unavailable."""
1609 def __init__(self, mode):
1610 self.mode = mode
1611 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1612 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1613 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1614 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1615 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1616 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1617 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1618 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1619 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1620 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1621 ###PERF kff Elsewhere there are comments about sucking the tags db
1622 ### into memory. That seems like a good idea.
1623 if not Ctx().trunk_only:
1624 self.tags_db = TagsDatabase(DB_OPEN_READ)
1625 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
1626 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1628 # "branch_name" -> svn_revnum in which branch was last filled.
1629 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1630 # revision which would have nothing to do.
1631 self.last_filled = {}
1633 def get_svn_revnum(self, cvs_rev_unique_key):
1634 """Return the Subversion revision number in which
1635 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1636 is no mapping for CVS_REV_UNIQUE_KEY."""
1637 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1639 def get_svn_commit(self, svn_revnum):
1640 """Return an SVNCommit that corresponds to SVN_REVNUM.
1642 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1644 This method can throw SVNCommitInternalInconsistencyError.
1646 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1647 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1648 if c_rev_keys == None:
1649 return None
1651 digest = None
1652 for key in c_rev_keys:
1653 c_rev = self.cvs_revisions.get_revision(key)
1654 svn_commit.add_revision(c_rev)
1655 # Set the author and log message for this commit by using
1656 # CVSRevision metadata, but only if haven't done so already.
1657 if digest is None:
1658 digest = c_rev.digest
1659 author, log_msg = self.svn_commit_metadata[digest]
1660 svn_commit.set_author(author)
1661 svn_commit.set_log_msg(log_msg)
1663 # If we're doing a trunk-only conversion, we don't need to do any more work.
1664 if Ctx().trunk_only:
1665 return svn_commit
1667 name, date = self._get_name_and_date(svn_revnum)
1668 if name:
1669 svn_commit.set_symbolic_name(name)
1670 svn_commit.set_date(date)
1671 if self.tags_db.has_key(name):
1672 svn_commit.is_tag = 1
1674 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1675 if motivating_revnum:
1676 svn_commit.set_motivating_revnum(int(motivating_revnum))
1677 svn_commit.set_date(date)
1679 if len(svn_commit.cvs_revs) and name:
1680 msg = """An SVNCommit cannot have cvs_revisions *and* a
1681 corresponding symbolic name ('%s') to fill.""" % name
1682 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1684 return svn_commit
1686 def set_cvs_revs(self, svn_revnum, cvs_revs):
1687 """Record the bidirectional mapping between SVN_REVNUM and
1688 CVS_REVS."""
1689 if self.mode == DB_OPEN_READ:
1690 raise RuntimeError, \
1691 'Write operation attempted on read-only PersistenceManager'
1692 for c_rev in cvs_revs:
1693 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1694 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1695 for c_rev in cvs_revs:
1696 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1698 def set_name_and_date(self, svn_revnum, name, date):
1699 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1700 if self.mode == DB_OPEN_READ:
1701 raise RuntimeError, \
1702 'Write operation attempted on read-only PersistenceManager'
1703 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1704 self.last_filled[name] = svn_revnum
1706 def _get_name_and_date(self, svn_revnum):
1707 """Return a tuple containing the symbolic name and date associated
1708 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1709 associated with it."""
1710 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1712 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1713 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1714 if self.mode == DB_OPEN_READ:
1715 raise RuntimeError, \
1716 'Write operation attempted on read-only PersistenceManager'
1717 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1720 class CVSCommit:
1721 """Each instance of this class contains a number of CVS Revisions
1722 that correspond to one or more Subversion Commits. After all CVS
1723 Revisions are added to the grouping, calling process_revisions will
1724 generate a Subversion Commit (or Commits) for the set of CVS
1725 Revisions in the grouping."""
1727 def __init__(self, digest, author, log):
1728 self.digest = digest
1729 self.author = author
1730 self.log = log
1732 # Symbolic names for which the last source revision has already
1733 # been seen and for which the CVSRevisionAggregator has already
1734 # generated a fill SVNCommit. See self.process_revisions().
1735 self.done_symbols = [ ]
1737 self.files = { }
1738 # Lists of CVSRevisions
1739 self.changes = [ ]
1740 self.deletes = [ ]
1742 # Start out with a t_min higher than any incoming time T, and a
1743 # t_max lower than any incoming T. This way the first T will
1744 # push t_min down to T, and t_max up to T, naturally (without any
1745 # special-casing), and successive times will then ratchet them
1746 # outward as appropriate.
1747 self.t_min = 1L<<32
1748 self.t_max = 0
1750 # This will be set to the SVNCommit that occurs in self._commit.
1751 self.motivating_commit = None
1753 # This is a list of all non-primary commits motivated by the main
1754 # commit. We gather these so that we can set their dates to the
1755 # same date as the primary commit.
1756 self.secondary_commits = [ ]
1758 # State for handling default branches.
1760 # Here is a tempting, but ultimately nugatory, bit of logic, which
1761 # I share with you so you may appreciate the less attractive, but
1762 # refreshingly non-nugatory, logic which follows it:
1764 # If some of the commits in this txn happened on a non-trunk
1765 # default branch, then those files will have to be copied into
1766 # trunk manually after being changed on the branch (because the
1767 # RCS "default branch" appears as head, i.e., trunk, in practice).
1768 # As long as those copies don't overwrite any trunk paths that
1769 # were also changed in this commit, then we can do the copies in
1770 # the same revision, because they won't cover changes that don't
1771 # appear anywhere/anywhen else. However, if some of the trunk dst
1772 # paths *did* change in this commit, then immediately copying the
1773 # branch changes would lose those trunk mods forever. So in this
1774 # case, we need to do at least that copy in its own revision. And
1775 # for simplicity's sake, if we're creating the new revision for
1776 # even one file, then we just do all such copies together in the
1777 # new revision.
1779 # Doesn't that sound nice?
1781 # Unfortunately, Subversion doesn't support copies with sources
1782 # in the current txn. All copies must be based in committed
1783 # revisions. Therefore, we generate the above-described new
1784 # revision unconditionally.
1786 # This is a list of c_revs, and a c_rev is appended for each
1787 # default branch commit that will need to be copied to trunk (or
1788 # deleted from trunk) in some generated revision following the
1789 # "regular" revision.
1790 self.default_branch_cvs_revisions = [ ]
1792 def __cmp__(self, other):
1793 # Commits should be sorted by t_max. If both self and other have
1794 # the same t_max, break the tie using t_min, and lastly, digest
1795 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1796 or cmp(self.digest, other.digest))
1798 def has_file(self, fname):
1799 return self.files.has_key(fname)
1801 def revisions(self):
1802 return self.changes + self.deletes
1804 def opens_symbolic_name(self, name):
1805 """Returns true if any CVSRevision in this commit is on a tag or a
1806 branch or is the origin of a tag or branch."""
1807 for c_rev in self.revisions():
1808 if c_rev.opens_symbolic_name(name):
1809 return 1
1810 return 0
1812 def add_revision(self, c_rev):
1813 # Record the time range of this commit.
1815 # ### ISSUE: It's possible, though unlikely, that the time range
1816 # of a commit could get gradually expanded to be arbitrarily
1817 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1818 # problem, and anyway deciding where to break it up would be a
1819 # judgement call. For now, we just print a warning in commit() if
1820 # this happens.
1821 if c_rev.timestamp < self.t_min:
1822 self.t_min = c_rev.timestamp
1823 if c_rev.timestamp > self.t_max:
1824 self.t_max = c_rev.timestamp
1826 if c_rev.op == OP_DELETE:
1827 self.deletes.append(c_rev)
1828 else:
1829 # OP_CHANGE or OP_ADD
1830 self.changes.append(c_rev)
1832 self.files[c_rev.fname] = 1
1834 def _pre_commit(self):
1835 """Generates any SVNCommits that must exist before the main
1836 commit."""
1838 # There may be multiple c_revs in this commit that would cause
1839 # branch B to be filled, but we only want to fill B once. On the
1840 # other hand, there might be multiple branches committed on in
1841 # this commit. Whatever the case, we should count exactly one
1842 # commit per branch, because we only fill a branch once per
1843 # CVSCommit. This list tracks which branches we've already
1844 # counted.
1845 accounted_for_sym_names = [ ]
1847 def fill_needed(c_rev, pm):
1848 """Return 1 if this is the first commit on a new branch (for
1849 this file) and we need to fill the branch; else return 0
1850 (meaning that some other file's first commit on the branch has
1851 already done the fill for us).
1853 If C_REV.op is OP_ADD, only return 1 if the branch that this
1854 commit is on has no last filled revision.
1856 PM is a PersistenceManager to query.
1859 # Different '.' counts indicate that c_rev is now on a different
1860 # line of development (and may need a fill)
1861 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1862 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1863 # It should be the case that when we have a file F that
1864 # is added on branch B (thus, F on trunk is in state
1865 # 'dead'), we generate an SVNCommit to fill B iff the branch
1866 # has never been filled before.
1868 # If this c_rev.op == OP_ADD, *and* the branch has never
1869 # been filled before, then fill it now. Otherwise, no need to
1870 # fill it.
1871 if c_rev.op == OP_ADD:
1872 if pm.last_filled.get(c_rev.branch_name, None) is None:
1873 return 1
1874 else:
1875 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1876 return 1
1877 return 0
1879 for c_rev in self.changes + self.deletes:
1880 # If a commit is on a branch, we must ensure that the branch
1881 # path being committed exists (in HEAD of the Subversion
1882 # repository). If it doesn't exist, we will need to fill the
1883 # branch. After the fill, the path on which we're committing
1884 # will exist.
1885 if c_rev.branch_name \
1886 and c_rev.branch_name not in accounted_for_sym_names \
1887 and c_rev.branch_name not in self.done_symbols \
1888 and fill_needed(c_rev, Ctx()._persistence_manager):
1889 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1890 % c_rev.branch_name)
1891 svn_commit.set_symbolic_name(c_rev.branch_name)
1892 self.secondary_commits.append(svn_commit)
1893 accounted_for_sym_names.append(c_rev.branch_name)
1895 def _commit(self):
1896 """Generates the primary SVNCommit that corresponds the this
1897 CVSCommit."""
1898 # Generate an SVNCommit unconditionally. Even if the only change
1899 # in this CVSCommit is a deletion of an already-deleted file (that
1900 # is, a CVS revision in state 'dead' whose predecessor was also in
1901 # state 'dead'), the conversion will still generate a Subversion
1902 # revision containing the log message for the second dead
1903 # revision, because we don't want to lose that information.
1904 svn_commit = SVNCommit("commit")
1905 self.motivating_commit = svn_commit
1907 for c_rev in self.changes:
1908 svn_commit.add_revision(c_rev)
1909 # Only make a change if we need to. When 1.1.1.1 has an empty
1910 # deltatext, the explanation is almost always that we're looking
1911 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1912 # such imports, CVS creates an RCS file where 1.1 has the
1913 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1914 # content as 1.1. There's no reason to reflect this non-change
1915 # in the repository, so we want to do nothing in this case. (If
1916 # we were really paranoid, we could make sure 1.1's log message
1917 # is the CVS-generated "Initial revision\n", but I think the
1918 # conditions below are strict enough.)
1919 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1920 and (c_rev.rev == "1.1.1.1")):
1921 if c_rev.is_default_branch_revision():
1922 self.default_branch_cvs_revisions.append(c_rev)
1924 for c_rev in self.deletes:
1925 # When a file is added on a branch, CVS not only adds the file
1926 # on the branch, but generates a trunk revision (typically
1927 # 1.1) for that file in state 'dead'. We only want to add
1928 # this revision if the log message is not the standard cvs
1929 # fabricated log message.
1930 if c_rev.prev_rev is None:
1931 # c_rev.branches may be empty if the originating branch
1932 # has been excluded.
1933 if not c_rev.branches:
1934 continue
1935 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
1936 % (c_rev.filename(),
1937 c_rev.branches[0]))
1938 author, log_msg = \
1939 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
1940 if log_msg == cvs_generated_msg:
1941 continue
1943 svn_commit.add_revision(c_rev)
1944 if c_rev.is_default_branch_revision():
1945 self.default_branch_cvs_revisions.append(c_rev)
1947 # There is a slight chance that we didn't actually register any
1948 # CVSRevisions with our SVNCommit (see loop over self.deletes
1949 # above), so if we have no CVSRevisions, we don't flush the
1950 # svn_commit to disk and roll back our revnum.
1951 if len(svn_commit.cvs_revs) > 0:
1952 svn_commit.flush()
1953 else:
1954 # We will not be flushing this SVNCommit, so rollback the
1955 # SVNCommit revision counter.
1956 SVNCommit.revnum = SVNCommit.revnum - 1
1958 if not Ctx().trunk_only:
1959 for c_rev in self.revisions():
1960 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
1962 def _post_commit(self):
1963 """Generates any SVNCommits that we can perform now that _commit
1964 has happened. That is, handle non-trunk default branches.
1965 Sometimes an RCS file has a non-trunk default branch, so a commit
1966 on that default branch would be visible in a default CVS checkout
1967 of HEAD. If we don't copy that commit over to Subversion's trunk,
1968 then there will be no Subversion tree which corresponds to that
1969 CVS checkout. Of course, in order to copy the path over, we may
1970 first need to delete the existing trunk there. """
1972 # Only generate a commit if we have default branch revs
1973 if len(self.default_branch_cvs_revisions):
1974 # Generate an SVNCommit for all of our default branch c_revs.
1975 svn_commit = SVNCommit("post-commit default branch(es)")
1976 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
1977 for c_rev in self.default_branch_cvs_revisions:
1978 svn_commit.add_revision(c_rev)
1979 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
1980 svn_commit.revnum)
1981 self.secondary_commits.append(svn_commit)
1983 def process_revisions(self, done_symbols):
1984 """Process all the CVSRevisions that this instance has, creating
1985 one or more SVNCommits in the process. Generate fill SVNCommits
1986 only for symbols not in DONE_SYMBOLS (avoids unnecessary
1987 fills).
1989 Return the primary SVNCommit that corresponds to this CVSCommit.
1990 The returned SVNCommit is the commit that motivated any other
1991 SVNCommits generated in this CVSCommit."""
1992 self.done_symbols = done_symbols
1993 seconds = self.t_max - self.t_min + 1
1995 Log().write(LOG_VERBOSE, '-' * 60)
1996 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
1997 if seconds == 1:
1998 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
1999 % time.ctime(self.t_max))
2000 else:
2001 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2002 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2003 % (time.ctime(self.t_max), seconds))
2005 if seconds > COMMIT_THRESHOLD + 1:
2006 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2007 % (warning_prefix, COMMIT_THRESHOLD))
2009 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2010 self._commit()
2011 return self.motivating_commit
2013 self._pre_commit()
2014 self._commit()
2015 self._post_commit()
2017 for svn_commit in self.secondary_commits:
2018 svn_commit.set_date(self.motivating_commit.get_date())
2019 svn_commit.flush()
2021 return self.motivating_commit
2024 class SVNCommit:
2025 """This represents one commit to the Subversion Repository. There
2026 are three types of SVNCommits:
2028 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2030 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2032 3. Updates trunk to reflect the contents of a particular branch
2033 (this is to handle RCS default branches)."""
2035 # The revision number to assign to the next new SVNCommit.
2036 # We start at 2 because SVNRepositoryMirror uses the first commit
2037 # to create trunk, tags, and branches.
2038 revnum = 2
2040 class SVNCommitInternalInconsistencyError(Exception):
2041 """Exception raised if we encounter an impossible state in the
2042 SVNCommit Databases."""
2043 pass
2045 def __init__(self, description="", revnum=None, cvs_revs=None):
2046 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2047 If REVNUM, the SVNCommit will correspond to that revision number;
2048 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2049 REVNUM.
2051 It is an error to pass CVS_REVS without REVNUM, but you may pass
2052 REVNUM without CVS_REVS, and then add a revision at a time by
2053 invoking add_revision()."""
2054 self._description = description
2056 # Revprop metadata for this commit.
2058 # These initial values are placeholders. At least the log and the
2059 # date should be different by the time these are used.
2061 # They are private because their values should be returned encoded
2062 # in UTF8, but callers aren't required to set them in UTF8.
2063 # Therefore, accessor methods are used to set them, and
2064 # self.get_revprops() is used to to get them, in dictionary form.
2065 self._author = Ctx().username
2066 self._log_msg = "This log message means an SVNCommit was used too soon."
2067 self._max_date = 0 # Latest date seen so far.
2069 self.cvs_revs = cvs_revs or []
2070 if revnum:
2071 self.revnum = revnum
2072 else:
2073 self.revnum = SVNCommit.revnum
2074 SVNCommit.revnum = SVNCommit.revnum + 1
2076 # The symbolic name that is filled in this SVNCommit, if any
2077 self.symbolic_name = None
2079 # If this commit is a default branch synchronization, this
2080 # variable represents the subversion revision number of the
2081 # *primary* commit where the default branch changes actually
2082 # happened. It is None otherwise.
2084 # It is possible for multiple for multiple synchronization commits
2085 # to refer to the same motivating commit revision number, and it
2086 # is possible for a single synchronization commit to contain
2087 # CVSRevisions on multiple different default branches.
2088 self.motivating_revnum = None
2090 # is_tag is true only if this commit is a fill of a symbolic name
2091 # that is a tag, None in all other cases.
2092 self.is_tag = None
2094 def set_symbolic_name(self, name):
2095 "Set self.symbolic_name to NAME."
2096 name = _clean_symbolic_name(name)
2097 self.symbolic_name = name
2099 def set_motivating_revnum(self, revnum):
2100 "Set self.motivating_revnum to REVNUM."
2101 self.motivating_revnum = revnum
2103 def set_author(self, author):
2104 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2105 This is the only way to set an SVNCommit's author."""
2106 self._author = author
2108 def set_log_msg(self, msg):
2109 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2110 This is the only way to set an SVNCommit's log message."""
2111 self._log_msg = msg
2113 def set_date(self, date):
2114 """Set this SVNCommit's date to DATE (an integer).
2115 Note that self.add_revision() updates this automatically based on
2116 a CVSRevision; so you may not need to call this at all, and even
2117 if you do, the value may be overwritten by a later call to
2118 self.add_revision()."""
2119 self._max_date = date
2121 def get_date(self):
2122 """Returns this SVNCommit's date as an integer."""
2123 return self._max_date
2125 def get_revprops(self):
2126 """Return the Subversion revprops for this SVNCommit."""
2127 date = format_date(self._max_date)
2128 try:
2129 ### FIXME: The 'replace' behavior should be an option, like
2130 ### --encoding is.
2131 utf8_author = None
2132 if self._author is not None:
2133 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
2134 utf8_author = unicode_author.encode('utf8')
2135 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
2136 utf8_log = unicode_log.encode('utf8')
2137 return { 'svn:author' : utf8_author,
2138 'svn:log' : utf8_log,
2139 'svn:date' : date }
2140 except UnicodeError:
2141 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2142 % warning_prefix)
2143 Log().write(LOG_WARN, " author: '%s'" % self._author)
2144 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2145 Log().write(LOG_WARN, " date: '%s'" % date)
2146 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
2147 for c_rev in self.cvs_revs:
2148 Log().write(LOG_WARN, " ", c_rev.fname)
2150 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2151 "'--encoding=latin1'.\n")
2152 # It's better to fall back to the original (unknown encoding) data
2153 # than to either 1) quit or 2) record nothing at all.
2154 return { 'svn:author' : self._author,
2155 'svn:log' : self.get_log_msg(),
2156 'svn:date' : date }
2158 def add_revision(self, cvs_rev):
2159 self.cvs_revs.append(cvs_rev)
2160 if cvs_rev.timestamp > self._max_date:
2161 self._max_date = cvs_rev.timestamp
2163 def _is_primary_commit(self):
2164 """Return true if this is a primary SVNCommit, false otherwise."""
2165 return not (self.symbolic_name or self.motivating_revnum)
2167 def flush(self):
2168 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
2169 % (self.revnum, self._description))
2170 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2172 if self.motivating_revnum is not None:
2173 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2174 self.motivating_revnum)
2176 # If we're not a primary commit, then store our date and/or our
2177 # symbolic_name
2178 if not self._is_primary_commit():
2179 Ctx()._persistence_manager.set_name_and_date(self.revnum,
2180 self.symbolic_name,
2181 self._max_date)
2183 def __str__(self):
2184 """ Print a human-readable description of this SVNCommit. This
2185 description is not intended to be machine-parseable (although
2186 we're not going to stop you if you try!)"""
2188 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2189 if self.symbolic_name:
2190 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
2191 else:
2192 ret = ret + " NO symbolic name\n"
2193 ret = ret + " debug description: " + self._description + "\n"
2194 ret = ret + " cvs_revs:\n"
2195 for c_rev in self.cvs_revs:
2196 ret = ret + " " + c_rev.unique_key() + "\n"
2197 return ret
2199 def get_log_msg(self):
2200 """Returns the actual log message for a primary commit, and the
2201 appropriate manufactured log message for a secondary commit."""
2202 if self.symbolic_name is not None:
2203 return self._log_msg_for_symbolic_name_commit()
2204 elif self.motivating_revnum is not None:
2205 return self._log_msg_for_default_branch_commit()
2206 else:
2207 return self._log_msg
2209 def _log_msg_for_symbolic_name_commit(self):
2210 """Creates a log message for a manufactured commit that fills
2211 self.symbolic_name. If self.is_tag is true, write the log message
2212 as though for a tag, else write it as though for a branch."""
2213 type = 'branch'
2214 if self.is_tag:
2215 type = 'tag'
2217 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2218 space_or_newline = ' '
2219 if len(self.symbolic_name) >= 13:
2220 space_or_newline = '\n'
2222 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2223 % (type, space_or_newline, self.symbolic_name)
2225 def _log_msg_for_default_branch_commit(self):
2226 """Creates a log message for a manufactured commit that
2227 synchronizes a non-trunk default branch with trunk."""
2228 msg = 'This commit was generated by cvs2svn to compensate for ' \
2229 'changes in r%d,\n' \
2230 'which included commits to RCS files with non-trunk default ' \
2231 'branches.\n' % self.motivating_revnum
2232 return msg
2234 class CVSRevisionAggregator:
2235 """This class groups CVSRevisions into CVSCommits that represent
2236 at least one SVNCommit."""
2237 def __init__(self):
2238 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2239 if not Ctx().trunk_only:
2240 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_READ)
2241 self.cvs_commits = {}
2242 self.pending_symbols = {}
2243 # A list of symbols for which we've already encountered the last
2244 # CVSRevision that is a source for that symbol. That is, the
2245 # final fill for this symbol has been done, and we never need to
2246 # fill it again.
2247 self.done_symbols = [ ]
2249 # This variable holds the most recently created primary svn_commit
2250 # object. CVSRevisionAggregator maintains this variable merely
2251 # for its date, so that it can set dates for the SVNCommits
2252 # created in self.attempt_to_commit_symbols().
2253 self.latest_primary_svn_commit = None
2255 Ctx()._symbolings_logger = SymbolingsLogger()
2256 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2257 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2258 DB_OPEN_READ)
2261 def process_revision(self, c_rev):
2262 # Each time we read a new line, we scan the commits we've
2263 # accumulated so far to see if any are ready for processing now.
2264 ready_queue = [ ]
2265 for digest_key, cvs_commit in self.cvs_commits.items():
2266 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2267 ready_queue.append(cvs_commit)
2268 del self.cvs_commits[digest_key]
2269 continue
2270 # If the inbound commit is on the same file as a pending commit,
2271 # close the pending commit to further changes. Don't flush it though,
2272 # as there may be other pending commits dated before this one.
2273 # ### ISSUE: the has_file() check below is not optimal.
2274 # It does fix the dataloss bug where revisions would get lost
2275 # if checked in too quickly, but it can also break apart the
2276 # commits. The correct fix would require tracking the dependencies
2277 # between change sets and committing them in proper order.
2278 if cvs_commit.has_file(c_rev.fname):
2279 unused_id = digest_key + '-'
2280 # Find a string that does is not already a key in
2281 # the self.cvs_commits dict
2282 while self.cvs_commits.has_key(unused_id):
2283 unused_id = unused_id + '-'
2284 self.cvs_commits[unused_id] = cvs_commit
2285 del self.cvs_commits[digest_key]
2287 # Add this item into the set of still-available commits.
2288 if self.cvs_commits.has_key(c_rev.digest):
2289 cvs_commit = self.cvs_commits[c_rev.digest]
2290 else:
2291 author, log = self.metadata_db[c_rev.digest]
2292 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2293 author, log)
2294 cvs_commit = self.cvs_commits[c_rev.digest]
2295 cvs_commit.add_revision(c_rev)
2297 # If there are any elements in the ready_queue at this point, they
2298 # need to be processed, because this latest rev couldn't possibly
2299 # be part of any of them. Sort them into time-order, then process
2300 # 'em.
2301 ready_queue.sort()
2303 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2304 # commits are ready.
2305 if len(ready_queue) == 0:
2306 self.attempt_to_commit_symbols(ready_queue, c_rev)
2308 for cvs_commit in ready_queue[:]:
2309 self.latest_primary_svn_commit \
2310 = cvs_commit.process_revisions(self.done_symbols)
2311 ready_queue.remove(cvs_commit)
2312 self.attempt_to_commit_symbols(ready_queue, c_rev)
2314 def flush(self):
2315 """Commit anything left in self.cvs_commits. Then inform the
2316 SymbolingsLogger that all commits are done."""
2318 ready_queue = [ ]
2319 for k, v in self.cvs_commits.items():
2320 ready_queue.append((v, k))
2322 ready_queue.sort()
2323 for cvs_commit_tuple in ready_queue[:]:
2324 self.latest_primary_svn_commit = \
2325 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2326 ready_queue.remove(cvs_commit_tuple)
2327 del self.cvs_commits[cvs_commit_tuple[1]]
2328 self.attempt_to_commit_symbols([])
2330 if not Ctx().trunk_only:
2331 Ctx()._symbolings_logger.close()
2333 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2335 This function generates 1 SVNCommit for each symbol in
2336 self.pending_symbols that doesn't have an opening CVSRevision in
2337 either QUEUED_COMMITS or self.cvs_commits.values().
2339 If C_REV is not None, then we first add to self.pending_symbols
2340 any symbols from C_REV that C_REV is the last CVSRevision for.
2342 # If we're not doing a trunk-only conversion, get the symbolic
2343 # names that this c_rev is the last *source* CVSRevision for and
2344 # add them to those left over from previous passes through the
2345 # aggregator.
2346 if c_rev and not Ctx().trunk_only:
2347 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2348 self.pending_symbols[sym] = None
2350 # Make a list of all symbols that still have *source* CVSRevisions
2351 # in the pending commit queue (self.cvs_commits).
2352 open_symbols = {}
2353 for sym in self.pending_symbols.keys():
2354 for cvs_commit in self.cvs_commits.values() + queued_commits:
2355 if cvs_commit.opens_symbolic_name(sym):
2356 open_symbols[sym] = None
2357 break
2359 # Sort the pending symbols so that we will always process the
2360 # symbols in the same order, regardless of the order in which the
2361 # dict hashing algorithm hands them back to us. We do this so
2362 # that our tests will get the same results on all platforms.
2363 sorted_pending_symbols_keys = self.pending_symbols.keys()
2364 sorted_pending_symbols_keys.sort()
2365 for sym in sorted_pending_symbols_keys:
2366 if open_symbols.has_key(sym): # sym is still open--don't close it.
2367 continue
2368 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2369 svn_commit.set_symbolic_name(sym)
2370 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2371 svn_commit.flush()
2372 self.done_symbols.append(sym)
2373 del self.pending_symbols[sym]
2376 class SymbolingsReader:
2377 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2378 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2379 returning the correct opening and closing Subversion revision
2380 numbers for a given symbolic name."""
2381 def __init__(self):
2382 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2383 reads the offsets database into memory."""
2384 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2385 # The offsets_db is really small, and we need to read and write
2386 # from it a fair bit, so suck it into memory
2387 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2388 self.offsets = { }
2389 for key in offsets_db.db.keys():
2390 #print " ZOO:", key, offsets_db[key]
2391 self.offsets[key] = offsets_db[key]
2393 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2394 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2395 SymbolicNameFillingGuide object.
2397 Note that if we encounter an opening rev in this fill, but the
2398 corresponding closing rev takes place later than SVN_REVNUM, the
2399 closing will not be passed to SymbolicNameFillingGuide in this
2400 fill (and will be discarded when encountered in a later fill).
2401 This is perfectly fine, because we can still do a valid fill
2402 without the closing--we always try to fill what we can as soon as
2403 we can."""
2404 # It's possible to have a branch start with a file that was added
2405 # on a branch
2406 if not self.offsets.has_key(symbolic_name):
2407 return SymbolicNameFillingGuide(symbolic_name)
2408 # set our read offset for self.symbolings to the offset for
2409 # symbolic_name
2410 self.symbolings.seek(self.offsets[symbolic_name])
2412 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2413 while (1):
2414 fpos = self.symbolings.tell()
2415 line = self.symbolings.readline().rstrip()
2416 if not line:
2417 break
2418 name, revnum, type, svn_path = line.split(" ", 3)
2419 revnum = int(revnum)
2420 if (revnum > svn_revnum
2421 or name != symbolic_name):
2422 break
2423 symbol_fill.register(svn_path, revnum, type)
2425 # get current offset of the read marker and set it to the offset
2426 # for the beginning of the line we just read if we used anything
2427 # we read.
2428 if not symbol_fill.is_empty():
2429 self.offsets[symbolic_name] = fpos
2431 symbol_fill.make_node_tree()
2432 return symbol_fill
2435 class SymbolicNameFillingGuide:
2436 """A SymbolicNameFillingGuide is essentially a node tree
2437 representing the source paths to be copied to fill
2438 self.symbolic_name in the current SVNCommit.
2440 After calling self.register() on a series of openings and closings,
2441 call self.make_node_tree() to prepare self.node_tree for
2442 examination. See the docstring for self.make_node_tree() for
2443 details on the structure of self.node_tree.
2445 By walking self.node_tree and calling self.get_best_revnum() on each
2446 node, the caller can determine what subversion revision number to
2447 copy the path corresponding to that node from. self.node_tree
2448 should be treated as read-only.
2450 The caller can then descend to sub-nodes to see if their "best
2451 revnum" differs from their parents' and if it does, take appropriate
2452 actions to "patch up" the subtrees."""
2453 def __init__(self, symbolic_name):
2454 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2455 prepares it for receiving openings and closings.
2457 Returns a fully functional and armed SymbolicNameFillingGuide
2458 object."""
2459 self.name = symbolic_name
2461 self.opening_key = "/o"
2462 self.closing_key = "/c"
2464 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2466 # { svn_path : { self.opening_key : svn_revnum,
2467 # self.closing_key : svn_revnum }
2468 # ...}
2469 self.things = { }
2471 # The key for the root node of the node tree
2472 self.root_key = '0'
2473 # The dictionary that holds our node tree, seeded with the root key.
2474 self.node_tree = { self.root_key : { } }
2476 def get_best_revnum(self, node, preferred_revnum):
2477 """Determine the best subversion revision number to use when
2478 copying the source tree beginning at NODE. Returns a
2479 subversion revision number.
2481 PREFERRED_REVNUM is passed to self._best_rev and used to
2482 calculate the best_revnum."""
2483 revnum = SVN_INVALID_REVNUM
2485 # Aggregate openings and closings from the rev tree
2486 openings = self._list_revnums_for_key(node, self.opening_key)
2487 closings = self._list_revnums_for_key(node, self.closing_key)
2489 # Score the lists
2490 scores = self._score_revisions(self._sum_revnum_counts(openings),
2491 self._sum_revnum_counts(closings))
2493 revnum, max_score = self._best_rev(scores, preferred_revnum)
2495 if revnum == SVN_INVALID_REVNUM:
2496 sys.stderr.write(error_prefix + ": failed to find a revision "
2497 + "to copy from when copying %s\n" % name)
2498 sys.exit(1)
2499 return revnum, max_score
2502 def _best_rev(self, scores, preferred_rev):
2503 """Return the revision with the highest score from SCORES, a list
2504 returned by _score_revisions(). When the maximum score is shared
2505 by multiple revisions, the oldest revision is selected, unless
2506 PREFERRED_REV is one of the possibilities, in which case, it is
2507 selected."""
2508 max_score = 0
2509 preferred_rev_score = -1
2510 rev = SVN_INVALID_REVNUM
2511 if preferred_rev is None:
2512 # Comparison order of different types is arbitrary. Do not
2513 # expect None to compare less than int values below.
2514 # In Python 2.3 None compares with ints like negative infinity.
2515 # In Python 2.0 None compares with ints like positive infinity.
2516 preferred_rev = SVN_INVALID_REVNUM
2517 for revnum, count in scores:
2518 if count > max_score:
2519 max_score = count
2520 rev = revnum
2521 if revnum <= preferred_rev:
2522 preferred_rev_score = count
2523 if preferred_rev_score == max_score:
2524 rev = preferred_rev
2525 return rev, max_score
2528 def _score_revisions(self, openings, closings):
2529 """Return a list of revisions and scores based on OPENINGS and
2530 CLOSINGS. The returned list looks like:
2532 [(REV1 SCORE1), (REV2 SCORE2), ...]
2534 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2535 self.opening__key and self.closing_key from some file or
2536 directory node, or else None.
2538 Each score indicates that copying the corresponding revision (or
2539 any following revision up to the next revision in the list) of the
2540 object in question would yield that many correct paths at or
2541 underneath the object. There may be other paths underneath it
2542 which are not correct and would need to be deleted or recopied;
2543 those can only be detected by descending and examining their
2544 scores.
2546 If OPENINGS is false, return the empty list."""
2547 # First look for easy outs.
2548 if not openings:
2549 return []
2551 # Must be able to call len(closings) below.
2552 if closings is None:
2553 closings = []
2555 # No easy out, so wish for lexical closures and calculate the scores :-).
2556 scores = []
2557 opening_score_accum = 0
2558 for i in range(len(openings)):
2559 opening_rev, opening_score = openings[i]
2560 opening_score_accum = opening_score_accum + opening_score
2561 scores.append((opening_rev, opening_score_accum))
2562 min = 0
2563 for i in range(len(closings)):
2564 closing_rev, closing_score = closings[i]
2565 done_exact_rev = None
2566 insert_index = None
2567 insert_score = None
2568 for j in range(min, len(scores)):
2569 score_rev, score = scores[j]
2570 if score_rev >= closing_rev:
2571 if not done_exact_rev:
2572 if score_rev > closing_rev:
2573 insert_index = j
2574 insert_score = scores[j-1][1] - closing_score
2575 done_exact_rev = 1
2576 scores[j] = (score_rev, score - closing_score)
2577 else:
2578 min = j + 1
2579 if not done_exact_rev:
2580 scores.append((closing_rev,scores[-1][1] - closing_score))
2581 if insert_index is not None:
2582 scores.insert(insert_index, (closing_rev, insert_score))
2583 return scores
2585 def _sum_revnum_counts(self, rev_list):
2586 """Takes an array of revisions (REV_LIST), for example:
2588 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2590 and adds up every occurrence of each revision and returns a sorted
2591 array of tuples containing (svn_revnum, count):
2593 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2595 s = {}
2596 for k in rev_list: # Add up the scores
2597 if s.has_key(k):
2598 s[k] = s[k] + 1
2599 else:
2600 s[k] = 1
2601 a = s.items()
2602 a.sort()
2603 return a
2605 def _list_revnums_for_key(self, node, revnum_type_key):
2606 """Scan self.node_tree and return a list of all the revision
2607 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2608 for all leaf nodes at and under NODE.
2610 REVNUM_TYPE_KEY should be either self.opening_key or
2611 self.closing_key."""
2612 revnums = []
2614 # If the node has self.opening_key, it must be a leaf node--all
2615 # leaf nodes have at least an opening key (although they may not
2616 # have a closing key. Fetch revnum and return
2617 if (self.node_tree[node].has_key(self.opening_key) and
2618 self.node_tree[node].has_key(revnum_type_key)):
2619 revnums.append(self.node_tree[node][revnum_type_key])
2620 return revnums
2622 for key, node_contents in self.node_tree[node].items():
2623 if key[0] == '/':
2624 continue
2625 revnums = revnums + \
2626 self._list_revnums_for_key(node_contents, revnum_type_key)
2627 return revnums
2629 def register(self, svn_path, svn_revnum, type):
2630 """Collects opening and closing revisions for this
2631 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2632 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2633 first svn revision number that we can copy from (our opening), or
2634 the last (not inclusive) svn revision number that we can copy from
2635 (our closing). TYPE indicates whether this path is an opening or a
2636 a closing.
2638 The opening for a given SVN_PATH must be passed before the closing
2639 for it to have any effect... any closing encountered before a
2640 corresponding opening will be discarded.
2642 It is not necessary to pass a corresponding closing for every
2643 opening.
2645 # Always log an OPENING
2646 if type == OPENING:
2647 self.things[svn_path] = {self.opening_key: svn_revnum}
2648 # Only log a closing if we've already registered the opening for that path.
2649 elif type == CLOSING and self.things.has_key(svn_path):
2650 # When we have a non-trunk default branch, we may have multiple
2651 # closings--only register the first closing we encounter.
2652 if not self.things[svn_path].has_key(self.closing_key):
2653 self.things[svn_path][self.closing_key] = svn_revnum
2655 def make_node_tree(self):
2656 """Generates the SymbolicNameFillingGuide's node tree from
2657 self.things. Each leaf node maps self.opening_key to the earliest
2658 subversion revision from which this node/path may be copied; and
2659 optionally map self.closing_key to the subversion revision one
2660 higher than the last revision from which this node/path may be
2661 copied. Intermediate nodes never contain opening or closing
2662 flags."""
2664 for svn_path, open_close in self.things.items():
2665 parent_key = self.root_key
2667 path_so_far = ""
2668 # Walk up the path, one node at a time.
2669 components = svn_path.split('/')
2670 last_path_component = components[-1]
2671 for component in components:
2672 path_so_far = path_so_far + '/' + component
2674 child_key = None
2675 if not self.node_tree[parent_key].has_key(component):
2676 child_key = gen_key()
2677 self.node_tree[child_key] = { }
2678 self.node_tree[parent_key][component] = child_key
2679 else:
2680 child_key = self.node_tree[parent_key][component]
2682 # If this is the leaf, add the openings and closings.
2683 if component is last_path_component:
2684 self.node_tree[child_key] = open_close
2685 parent_key = child_key
2686 #print_node_tree(self.node_tree, self.root_key)
2688 def is_empty(self):
2689 """Return true if we haven't accumulated any openings or closings,
2690 false otherwise."""
2691 return not len(self.things)
2694 class FillSource:
2695 """Representation of a fill source used by the symbol filler in
2696 SVNRepositoryMirror."""
2697 def __init__(self, prefix, key):
2698 """Create an unscored fill source with a prefix and a key."""
2699 self.prefix = prefix
2700 self.key = key
2701 self.score = None
2702 self.revnum = None
2704 def set_score(self, score, revnum):
2705 """Set the SCORE and REVNUM."""
2706 self.score = score
2707 self.revnum = revnum
2709 def __cmp__(self, other):
2710 """Comparison operator used to sort FillSources in descending
2711 score order."""
2712 if self.score is None or other.score is None:
2713 raise TypeError, 'Tried to compare unscored FillSource'
2714 return cmp(other.score, self.score)
2717 class SVNRepositoryMirror:
2718 """Mirror a Subversion Repository as it is constructed, one
2719 SVNCommit at a time. The mirror is skeletal; it does not contain
2720 file contents. The creation of a dumpfile or Subversion repository
2721 is handled by delegates. See self.add_delegate method for how to
2722 set delegates.
2724 The structure of the repository is kept in two databases and one
2725 hash. The revs_db database maps revisions to root node keys, and
2726 the nodes_db database maps node keys to nodes. A node is a hash
2727 from directory names to keys. Both the revs_db and the nodes_db are
2728 stored on disk and each access is expensive.
2730 The nodes_db database only has the keys for old revisions. The
2731 revision that is being contructed is kept in memory in the new_nodes
2732 hash which is cheap to access.
2734 You must invoke _start_commit between SVNCommits.
2736 *** WARNING *** All path arguments to methods in this class CANNOT
2737 have leading or trailing slashes.
2740 class SVNRepositoryMirrorPathExistsError(Exception):
2741 """Exception raised if an attempt is made to add a path to the
2742 repository mirror and that path already exists in the youngest
2743 revision of the repository."""
2744 pass
2746 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2747 """Exception raised if a CVSRevision is found to have an unexpected
2748 operation (OP) value."""
2749 pass
2751 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2752 """Exception raised if an empty SymbolicNameFillingGuide is returned
2753 during a fill where the branch in question already exists."""
2754 pass
2756 def __init__(self):
2757 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2758 self.delegates = [ ]
2760 # This corresponds to the 'revisions' table in a Subversion fs.
2761 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
2762 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
2764 # This corresponds to the 'nodes' table in a Subversion fs. (We
2765 # don't need a 'representations' or 'strings' table because we
2766 # only track metadata, not file contents.)
2767 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
2768 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
2770 # Start at revision 0 without a root node. It will be created
2771 # by _open_writable_root_node.
2772 self.youngest = 0
2773 self.new_root_key = None
2774 self.new_nodes = { }
2776 if not Ctx().trunk_only:
2777 ###PERF IMPT: Suck this into memory.
2778 self.tags_db = TagsDatabase(DB_OPEN_READ)
2779 self.symbolings_reader = SymbolingsReader()
2781 def _initialize_repository(self, date):
2782 """Initialize the repository by creating the directories for
2783 trunk, tags, and branches. This method should only be called
2784 after all delegates are added to the repository mirror."""
2785 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2786 # magic therein
2787 svn_commit = SVNCommit("Initialization", 1)
2788 svn_commit.set_date(date)
2789 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2791 self._start_commit(svn_commit)
2792 self._mkdir(Ctx().trunk_base)
2793 if not Ctx().trunk_only:
2794 self._mkdir(Ctx().branches_base)
2795 self._mkdir(Ctx().tags_base)
2797 def _start_commit(self, svn_commit):
2798 """Start a new commit."""
2799 if self.youngest > 0:
2800 self._end_commit()
2802 self.youngest = svn_commit.revnum
2803 self.new_root_key = None
2804 self.new_nodes = { }
2806 self._invoke_delegates('start_commit', svn_commit)
2808 def _end_commit(self):
2809 """Called at the end of each commit. This method copies the newly
2810 created nodes to the on-disk nodes db."""
2811 if self.new_root_key is None:
2812 # No changes were made in this revision, so we make the root node
2813 # of the new revision be the same as the last one.
2814 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2815 else:
2816 self.revs_db[str(self.youngest)] = self.new_root_key
2817 # Copy the new nodes to the nodes_db
2818 for key, value in self.new_nodes.items():
2819 self.nodes_db[key] = value
2821 def _get_node(self, key):
2822 """Returns the node contents for KEY which may refer to either
2823 self.nodes_db or self.new_nodes."""
2824 if self.new_nodes.has_key(key):
2825 return self.new_nodes[key]
2826 else:
2827 return self.nodes_db[key]
2829 def _open_readonly_node(self, path, revnum):
2830 """Open a readonly node for PATH at revision REVNUM. Returns the
2831 node key and node contents if the path exists, else (None, None)."""
2832 # Get the root key
2833 if revnum == self.youngest:
2834 if self.new_root_key is None:
2835 node_key = self.revs_db[str(self.youngest - 1)]
2836 else:
2837 node_key = self.new_root_key
2838 else:
2839 node_key = self.revs_db[str(revnum)]
2841 for component in path.split('/'):
2842 node_contents = self._get_node(node_key)
2843 if not node_contents.has_key(component):
2844 return None
2845 node_key = node_contents[component]
2847 return node_key
2849 def _open_writable_root_node(self):
2850 """Open a writable root node. The current root node is returned
2851 immeditely if it is already writable. If not, create a new one by
2852 copying the contents of the root node of the previous version."""
2853 if self.new_root_key is not None:
2854 return self.new_root_key, self.new_nodes[self.new_root_key]
2856 if self.youngest < 2:
2857 new_contents = { }
2858 else:
2859 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2860 self.new_root_key = gen_key()
2861 self.new_nodes = { self.new_root_key: new_contents }
2863 return self.new_root_key, new_contents
2865 def _open_writable_node(self, svn_path, create):
2866 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2867 and any missing directories if CREATE is True."""
2868 parent_key, parent_contents = self._open_writable_root_node()
2870 # Walk up the path, one node at a time.
2871 path_so_far = None
2872 components = svn_path.split('/')
2873 for i in range(len(components)):
2874 component = components[i]
2875 this_key = this_contents = None
2876 path_so_far = _path_join(path_so_far, component)
2877 if parent_contents.has_key(component):
2878 # The component exists.
2879 this_key = parent_contents[component]
2880 if self.new_nodes.has_key(this_key):
2881 this_contents = self.new_nodes[this_key]
2882 else:
2883 # Suck the node from the nodes_db, but update the key
2884 this_contents = self.nodes_db[this_key]
2885 this_key = gen_key()
2886 self.new_nodes[this_key] = this_contents
2887 parent_contents[component] = this_key
2888 elif create:
2889 # The component does not exists, so we create it.
2890 this_contents = { }
2891 this_key = gen_key()
2892 self.new_nodes[this_key] = this_contents
2893 parent_contents[component] = this_key
2894 if i < len(components) - 1:
2895 self._invoke_delegates('mkdir', path_so_far)
2896 else:
2897 # The component does not exists and we are not instructed to
2898 # create it, so we give up.
2899 return None, None
2901 parent_key = this_key
2902 parent_contents = this_contents
2904 return this_key, this_contents
2906 def _path_exists(self, path):
2907 """If PATH exists in self.youngest of the svn repository mirror,
2908 return true, else return None.
2910 PATH must not start with '/'."""
2911 return self._open_readonly_node(path, self.youngest) is not None
2913 def _fast_delete_path(self, parent_path, parent_contents, component):
2914 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2915 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2916 in PARENT_CONTENTS."""
2917 if parent_contents.has_key(component):
2918 del parent_contents[component]
2919 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2921 def _delete_path(self, svn_path, should_prune=False):
2922 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2923 all ancestor directories that are made empty when SVN_PATH is deleted.
2924 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2926 NOTE: This function does *not* allow you delete top-level entries
2927 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2928 those entries."""
2929 pos = svn_path.rfind('/')
2930 parent_path = svn_path[:pos]
2931 entry = svn_path[pos+1:]
2932 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2933 if parent_key is not None:
2934 self._fast_delete_path(parent_path, parent_contents, entry)
2935 # The following recursion makes pruning an O(n^2) operation in the
2936 # worst case (where n is the depth of SVN_PATH), but the worst case
2937 # is probably rare, and the constant cost is pretty low. Another
2938 # drawback is that we issue a delete for each path and not just
2939 # a single delete for the topmost directory pruned.
2940 if (should_prune and len(parent_contents) == 0 and
2941 parent_path.find('/') != -1):
2942 self._delete_path(parent_path, True)
2944 def _mkdir(self, path):
2945 """Create PATH in the repository mirror at the youngest revision."""
2946 self._open_writable_node(path, True)
2947 self._invoke_delegates('mkdir', path)
2949 def _change_path(self, cvs_rev):
2950 """Register a change in self.youngest for the CVS_REV's svn_path
2951 in the repository mirror."""
2952 # We do not have to update the nodes because our mirror is only
2953 # concerned with the presence or absence of paths, and a file
2954 # content change does not cause any path changes.
2955 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
2957 def _add_path(self, cvs_rev):
2958 """Add the CVS_REV's svn_path to the repository mirror."""
2959 self._open_writable_node(cvs_rev.svn_path, True)
2960 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
2962 def _copy_path(self, src_path, dest_path, src_revnum):
2963 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
2964 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
2965 parent *must* exist, but DEST_PATH *cannot* exist.
2967 Return the node key and the contents of the new node at DEST_PATH
2968 as a dictionary."""
2969 # get the contents of the node of our src_path
2970 src_key = self._open_readonly_node(src_path, src_revnum)
2971 src_contents = self._get_node(src_key)
2973 # Get the parent path and the base path of the dest_path
2974 pos = dest_path.rindex('/')
2975 dest_parent = dest_path[:pos]
2976 dest_basename = dest_path[pos+1:]
2977 dest_parent_key, dest_parent_contents = \
2978 self._open_writable_node(dest_parent, False)
2980 if dest_parent_contents.has_key(dest_basename):
2981 msg = "Attempt to add path '%s' to repository mirror " % dest_path
2982 msg = msg + "when it already exists in the mirror."
2983 raise self.SVNRepositoryMirrorPathExistsError, msg
2985 dest_parent_contents[dest_basename] = src_key
2986 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
2988 # Yes sir, src_key and src_contents are also the contents of the
2989 # destination. This is a cheap copy, remember! :-)
2990 return src_key, src_contents
2992 def _fill_symbolic_name(self, svn_commit):
2993 """Performs all copies necessary to create as much of the the tag
2994 or branch SVN_COMMIT.symbolic_name as possible given the current
2995 revision of the repository mirror.
2997 The symbolic name is guaranteed to exist in the Subversion
2998 repository by the end of this call, even if there are no paths
2999 under it."""
3000 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3001 svn_commit.symbolic_name, self.youngest)
3003 # Create the list of sources for the symbolic name. All source
3004 # prefixes must be direct sources for the destination, i.e. we
3005 # must have 'trunk' and 'branches/my_branch' and not just
3006 # 'branches'.
3007 sources = []
3008 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
3009 if entry == Ctx().trunk_base:
3010 sources.append(FillSource(entry, key))
3011 elif entry == Ctx().branches_base:
3012 for entry2, key2 in symbol_fill.node_tree[key].items():
3013 sources.append(FillSource(entry + '/' + entry2, key2))
3014 else:
3015 raise # Should never happen
3016 if self.tags_db.has_key(svn_commit.symbolic_name):
3017 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
3018 else:
3019 dest_prefix = _path_join(Ctx().branches_base,
3020 svn_commit.symbolic_name)
3022 if sources:
3023 dest_key = self._open_writable_node(dest_prefix, False)[0]
3024 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3025 else:
3026 # We can only get here for a branch whose first commit is an add
3027 # (as opposed to a copy).
3028 dest_path = Ctx().branches_base + '/' + symbol_fill.name
3029 if not self._path_exists(dest_path):
3030 # If our symbol_fill was empty, that means that our first
3031 # commit on the branch was to a file added on the branch, and
3032 # that this is our first fill of that branch.
3034 # This case is covered by test 16.
3036 # ...we create the branch by copying trunk from the our
3037 # current revision number minus 1
3038 source_path = Ctx().trunk_base
3039 entries = self._copy_path(source_path, dest_path,
3040 svn_commit.revnum - 1)[1]
3041 # Now since we've just copied trunk to a branch that's
3042 # *supposed* to be empty, we delete any entries in the
3043 # copied directory.
3044 for entry in entries.keys():
3045 del_path = dest_path + '/' + entry
3046 # Delete but don't prune.
3047 self._delete_path(del_path)
3048 else:
3049 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
3050 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3051 msg = msg + "attempted to create a branch that already exists."
3052 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3054 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3055 path = None, parent_source_prefix = None,
3056 preferred_revnum = None, prune_ok = None):
3057 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3058 SOURCES, and recurse into the child items.
3060 DEST_PREFIX is the prefix of the destination directory, e.g.
3061 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3062 FillSource classes that are candidates to be copied to the
3063 destination. DEST_KEY is the key in self.nodes_db to the
3064 destination, or None if the destination does not yet exist.
3066 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3067 are at the top level, e.g. '/tags/my_tag'.
3069 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3070 the parent directory, and PREFERRED_REVNUM is an int which is the
3071 source revision number that the caller (who may have copied KEY's
3072 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3073 then no revision is preferable to any other (which probably means
3074 that no copies have happened yet).
3076 PRUNE_OK means that a copy has been made in this recursion, and
3077 it's safe to prune directories that are not in
3078 SYMBOL_FILL.node_tree, provided that said directory has a source
3079 prefix of one of the PARENT_SOURCE_PREFIX.
3081 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3082 should only be passed in by recursive calls."""
3083 # Calculate scores and revnums for all sources
3084 for source in sources:
3085 src_revnum, score = symbol_fill.get_best_revnum(source.key,
3086 preferred_revnum)
3087 source.set_score(score, src_revnum)
3089 # Sort the sources in descending score order so that we will make
3090 # a eventual copy from the source with the highest score.
3091 sources.sort()
3092 copy_source = sources[0]
3094 src_path = _path_join(copy_source.prefix, path)
3095 dest_path = _path_join(dest_prefix, path)
3097 # Figure out if we shall copy to this destination and delete any
3098 # destination path that is in the way.
3099 do_copy = 0
3100 if dest_key is None:
3101 do_copy = 1
3102 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3103 copy_source.revnum != preferred_revnum):
3104 # We are about to replace the destination, so we need to remove
3105 # it before we perform the copy.
3106 self._delete_path(dest_path)
3107 do_copy = 1
3109 if do_copy:
3110 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3111 copy_source.revnum)
3112 prune_ok = 1
3113 else:
3114 dest_entries = self._get_node(dest_key)
3116 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3117 # elements and the values are lists of FillSource classes where
3118 # this path element exists.
3119 src_entries = {}
3120 for source in sources:
3121 for entry, key in symbol_fill.node_tree[source.key].items():
3122 if entry[0] == '/': # Skip flags
3123 continue
3124 if not src_entries.has_key(entry):
3125 src_entries[entry] = []
3126 src_entries[entry].append(FillSource(source.prefix, key))
3128 if prune_ok:
3129 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3130 delete_list = [ ]
3131 for entry in dest_entries.keys():
3132 if not src_entries.has_key(entry):
3133 delete_list.append(entry)
3134 if delete_list:
3135 if not self.new_nodes.has_key(dest_key):
3136 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3137 # Sort the delete list to get "diffable" dumpfiles.
3138 delete_list.sort()
3139 for entry in delete_list:
3140 self._fast_delete_path(dest_path, dest_entries, entry)
3142 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3143 src_keys = src_entries.keys()
3144 src_keys.sort()
3145 for src_key in src_keys:
3146 if dest_entries.has_key(src_key):
3147 next_dest_key = dest_entries[src_key]
3148 else:
3149 next_dest_key = None
3150 self._fill(symbol_fill, dest_prefix, next_dest_key,
3151 src_entries[src_key], _path_join(path, src_key),
3152 copy_source.prefix, sources[0].revnum, prune_ok)
3154 def _synchronize_default_branch(self, svn_commit):
3155 """Propagate any changes that happened on a non-trunk default
3156 branch to the trunk of the repository. See
3157 CVSCommit._post_commit() for details on why this is necessary."""
3158 for cvs_rev in svn_commit.cvs_revs:
3159 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3160 if self._path_exists(cvs_rev.svn_trunk_path):
3161 # Delete the path on trunk...
3162 self._delete_path(cvs_rev.svn_trunk_path)
3163 # ...and copy over from branch
3164 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
3165 svn_commit.motivating_revnum)
3166 elif cvs_rev.op == OP_DELETE:
3167 # delete trunk path
3168 self._delete_path(cvs_rev.svn_trunk_path)
3169 else:
3170 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3171 % cvs_rev.op)
3172 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3174 def commit(self, svn_commit):
3175 """Add an SVNCommit to the SVNRepository, incrementing the
3176 Repository revision number, and changing the repository. Invoke
3177 the delegates' _start_commit() method."""
3179 if svn_commit.revnum == 2:
3180 self._initialize_repository(svn_commit.get_date())
3182 self._start_commit(svn_commit)
3184 if svn_commit.symbolic_name:
3185 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3186 svn_commit.symbolic_name)
3187 self._fill_symbolic_name(svn_commit)
3188 elif svn_commit.motivating_revnum:
3189 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3190 % svn_commit.motivating_revnum)
3191 self._synchronize_default_branch(svn_commit)
3192 else: # This actually commits CVSRevisions
3193 if len(svn_commit.cvs_revs) > 1: plural = "s"
3194 else: plural = ""
3195 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3196 % (len(svn_commit.cvs_revs), plural))
3197 for cvs_rev in svn_commit.cvs_revs:
3198 # See comment in CVSCommit._commit() for what this is all
3199 # about. Note that although asking self._path_exists() is
3200 # somewhat expensive, we only do it if the first two (cheap)
3201 # tests succeed first.
3202 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3203 and (cvs_rev.rev == "1.1.1.1")
3204 and self._path_exists(cvs_rev.svn_path)):
3205 if cvs_rev.op == OP_ADD:
3206 self._add_path(cvs_rev)
3207 elif cvs_rev.op == OP_CHANGE:
3208 self._change_path(cvs_rev)
3210 if cvs_rev.op == OP_DELETE:
3211 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3213 def cleanup(self):
3214 """Callback for the Cleanup.register in self.__init__."""
3215 self.revs_db = None
3216 self.nodes_db = None
3218 def add_delegate(self, delegate):
3219 """Adds DELEGATE to self.delegates.
3221 For every delegate you add, as soon as SVNRepositoryMirror
3222 performs a repository action method, SVNRepositoryMirror will call
3223 the delegate's corresponding repository action method. Multiple
3224 delegates will be called in the order that they are added. See
3225 SVNRepositoryMirrorDelegate for more information."""
3226 self.delegates.append(delegate)
3228 def _invoke_delegates(self, method, *args):
3229 """Iterate through each of our delegates, in the order that they
3230 were added, and call the delegate's method named METHOD with the
3231 arguments in ARGS."""
3232 for delegate in self.delegates:
3233 getattr(delegate, method)(*args)
3235 def finish(self):
3236 """Calls the delegate finish method."""
3237 self._end_commit()
3238 self._invoke_delegates('finish')
3239 self.cleanup()
3242 class SVNCommitItem:
3243 """A wrapper class for CVSRevision objects upon which
3244 Subversion-related data (such as properties) may be hung."""
3246 def __init__(self, c_rev, make_svn_props):
3247 self.c_rev = c_rev
3248 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3249 self.eol_from_mime_type = Ctx().eol_from_mime_type
3250 self.no_default_eol = Ctx().no_default_eol
3251 self.keywords_off = Ctx().keywords_off
3252 self.mime_mapper = Ctx().mime_mapper
3254 # We begin with only a "CVS revision" property.
3255 self.svn_props = { }
3256 if self.set_cvs_revnum_properties:
3257 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3258 make_svn_props = True
3260 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3261 if make_svn_props:
3262 # Tack on the executableness, if any.
3263 if c_rev.file_executable:
3264 self.svn_props['svn:executable'] = '*'
3266 # Set the svn:keywords property, if appropriate. See issue #2.
3267 if c_rev.mode is None or c_rev.mode == 'kv' or c_rev.mode == 'kvl':
3268 if not self.keywords_off:
3269 self.svn_props['svn:keywords'] = 'author date id revision'
3271 # Set mime-type and eol. These two properties are intertwingled;
3272 # follow the conditionals carefully. See also issue #39.
3273 mime_type = None
3274 eol_style = None
3276 if self.mime_mapper:
3277 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3279 if not c_rev.mode == 'b':
3280 if not self.no_default_eol:
3281 eol_style = 'native'
3282 elif mime_type and self.eol_from_mime_type:
3283 if mime_type.startswith("text/"):
3284 eol_style = 'native'
3285 else:
3286 eol_style = None
3287 elif mime_type is None:
3288 # file is kb, and no other mimetype specified
3289 mime_type = 'application/octet-stream'
3291 if mime_type:
3292 self.svn_props['svn:mime-type'] = mime_type
3294 if eol_style:
3295 self.svn_props['svn:eol-style'] = eol_style
3298 class SVNRepositoryMirrorDelegate:
3299 """Abstract superclass for any delegate to SVNRepositoryMirror.
3300 Subclasses must implement all of the methods below.
3302 For each method, a subclass implements, in its own way, the
3303 Subversion operation implied by the method's name. For example, for
3304 the add_path method, the DumpfileDelegate would write out a
3305 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3306 would merely print that the path is being added to the repository,
3307 and the RepositoryDelegate would actually cause the path to be added
3308 to the Subversion repository that it is creating.
3311 def start_commit(self, svn_commit):
3312 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3313 see subclass implementation for details."""
3314 raise NotImplementedError
3316 def mkdir(self, path):
3317 """PATH is a string; see subclass implementation for details."""
3318 raise NotImplementedError
3320 def add_path(self, s_item):
3321 """S_ITEM is an SVNCommitItem; see subclass implementation for
3322 details."""
3323 raise NotImplementedError
3325 def change_path(self, s_item):
3326 """S_ITEM is an SVNCommitItem; see subclass implementation for
3327 details."""
3328 raise NotImplementedError
3330 def delete_path(self, path):
3331 """PATH is a string; see subclass implementation for
3332 details."""
3333 raise NotImplementedError
3335 def copy_path(self, src_path, dest_path, src_revnum):
3336 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3337 subversion revision number (int); see subclass implementation for
3338 details."""
3339 raise NotImplementedError
3341 def finish(self):
3342 """Perform any cleanup necessary after all revisions have been
3343 committed."""
3344 raise NotImplementedError
3347 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3348 """Create a Subversion dumpfile."""
3350 def __init__(self, dumpfile_path=None):
3351 """Return a new DumpfileDelegate instance, attached to a dumpfile
3352 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3354 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3355 property on files, when they are changed due to a corresponding
3356 CVS revision.
3358 If Ctx().mime_mapper is not None, then it is a MimeMapper
3359 instance, used to determine whether or not to set the
3360 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3361 is None, files marked with the CVS 'kb' flag will receive a mime
3362 type of "application/octet-stream".
3364 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3365 'native' for files not marked with the CVS 'kb' flag, except as
3366 superseded by Ctx().eol_from_mime_type (see below).
3368 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3369 to 'native' for all files to which Ctx().mime_mapper assigns a
3370 mime type beginning with "text/", and don't set 'svn:eol-style'
3371 for files assigned a type not beginning with "text/".
3372 """
3373 if dumpfile_path:
3374 self.dumpfile_path = dumpfile_path
3375 else:
3376 self.dumpfile_path = Ctx().dumpfile
3377 self.path_encoding = Ctx().encoding
3379 self.dumpfile = open(self.dumpfile_path, 'wb')
3380 self._write_dumpfile_header(self.dumpfile)
3382 def _write_dumpfile_header(self, dumpfile):
3383 # Initialize the dumpfile with the standard headers.
3385 # Since the CVS repository doesn't have a UUID, and the Subversion
3386 # repository will be created with one anyway, we don't specify a
3387 # UUID in the dumpflie
3388 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3390 def _utf8_path(self, path):
3391 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3392 encoded in self.path_encoding."""
3393 try:
3394 # Log messages can be converted with the 'replace' strategy,
3395 # but we can't afford any lossiness here.
3396 unicode_path = unicode(path, self.path_encoding, 'strict')
3397 return unicode_path.encode('utf-8')
3398 except UnicodeError:
3399 print "Unable to convert a path '%s' to internal encoding." % path
3400 print "Consider rerunning with (for example) '--encoding=latin1'"
3401 sys.exit(1)
3403 def start_commit(self, svn_commit):
3404 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3406 self.revision = svn_commit.revnum
3408 # The start of a new commit typically looks like this:
3410 # Revision-number: 1
3411 # Prop-content-length: 129
3412 # Content-length: 129
3414 # K 7
3415 # svn:log
3416 # V 27
3417 # Log message for revision 1.
3418 # K 10
3419 # svn:author
3420 # V 7
3421 # jrandom
3422 # K 8
3423 # svn:date
3424 # V 27
3425 # 2003-04-22T22:57:58.132837Z
3426 # PROPS-END
3428 # Notice that the length headers count everything -- not just the
3429 # length of the data but also the lengths of the lengths, including
3430 # the 'K ' or 'V ' prefixes.
3432 # The reason there are both Prop-content-length and Content-length
3433 # is that the former includes just props, while the latter includes
3434 # everything. That's the generic header form for any entity in a
3435 # dumpfile. But since revisions only have props, the two lengths
3436 # are always the same for revisions.
3438 # Calculate the total length of the props section.
3439 props = svn_commit.get_revprops()
3440 prop_names = props.keys()
3441 prop_names.sort()
3442 total_len = 10 # len('PROPS-END\n')
3443 for propname in prop_names:
3444 if props[propname] is None:
3445 continue
3446 klen = len(propname)
3447 klen_len = len('K %d' % klen)
3448 vlen = len(props[propname])
3449 vlen_len = len('V %d' % vlen)
3450 # + 4 for the four newlines within a given property's section
3451 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3453 # Print the revision header and props
3454 self.dumpfile.write('Revision-number: %d\n'
3455 'Prop-content-length: %d\n'
3456 'Content-length: %d\n'
3457 '\n'
3458 % (self.revision, total_len, total_len))
3460 for propname in prop_names:
3461 if props[propname] is None:
3462 continue
3463 self.dumpfile.write('K %d\n'
3464 '%s\n'
3465 'V %d\n'
3466 '%s\n' % (len(propname),
3467 propname,
3468 len(props[propname]),
3469 props[propname]))
3471 self.dumpfile.write('PROPS-END\n')
3472 self.dumpfile.write('\n')
3474 def mkdir(self, path):
3475 """Emit the creation of directory PATH."""
3476 self.dumpfile.write("Node-path: %s\n"
3477 "Node-kind: dir\n"
3478 "Node-action: add\n"
3479 "Content-length: 10\n"
3480 "\n"
3481 "\n" % self._utf8_path(path))
3483 def _add_or_change_path(self, s_item, op):
3484 """Emit the addition or change corresponding to S_ITEM.
3485 OP is either the constant OP_ADD or OP_CHANGE."""
3487 # Validation stuffs
3488 if op == OP_ADD:
3489 action = 'add'
3490 elif op == OP_CHANGE:
3491 action = 'change'
3492 else:
3493 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3494 % (error_prefix, op))
3495 sys.exit(1)
3497 # Convenience variables
3498 c_rev = s_item.c_rev
3499 svn_props = s_item.svn_props
3501 # The property handling here takes advantage of an undocumented
3502 # but IMHO consistent feature of the Subversion dumpfile-loading
3503 # code. When a node's properties aren't mentioned (that is, the
3504 # "Prop-content-length:" header is absent, no properties are
3505 # listed at all, and there is no "PROPS-END\n" line) then no
3506 # change is made to the node's properties.
3508 # This is consistent with the way dumpfiles behave w.r.t. text
3509 # content changes, so I'm comfortable relying on it. If you
3510 # commit a change to *just* the properties of some node that
3511 # already has text contents from a previous revision, then in the
3512 # dumpfile output for the prop change, no "Text-content-length:"
3513 # nor "Text-content-md5:" header will be present, and the text of
3514 # the file will not be given. But this does not cause the file's
3515 # text to be erased! It simply remains unchanged.
3517 # This works out great for cvs2svn, due to lucky coincidences:
3519 # For files, the only properties we ever set are set in the first
3520 # revision; all other revisions (including on branches) inherit
3521 # from that. After the first revision, we never change file
3522 # properties, therefore, there is no need to remember the full set
3523 # of properties on a given file once we've set it.
3525 # For directories, the only property we set is "svn:ignore", and
3526 # while we may change it after the first revision, we always do so
3527 # based on the contents of a ".cvsignore" file -- in other words,
3528 # CVS is doing the remembering for us, so we still don't have to
3529 # preserve the previous value of the property ourselves.
3531 # Calculate the (sorted-by-name) property string and length, if any.
3532 prop_contents = ''
3533 prop_names = svn_props.keys()
3534 prop_names.sort()
3535 for pname in prop_names:
3536 pval = svn_props[pname]
3537 prop_contents = prop_contents + \
3538 'K %d\n%s\nV %d\n%s\n' \
3539 % (len(pname), pname, len(pval), pval)
3540 if prop_contents:
3541 prop_contents = prop_contents + 'PROPS-END\n'
3542 props_len = len(prop_contents)
3543 else:
3544 props_len = 0
3546 props_header = ''
3547 if props_len:
3548 props_header = 'Prop-content-length: %d\n' % props_len
3550 # treat .cvsignore as a directory property
3551 dir_path, basename = os.path.split(c_rev.svn_path)
3552 if basename == ".cvsignore":
3553 ignore_vals = generate_ignores(c_rev)
3554 ignore_contents = '\n'.join(ignore_vals)
3555 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3556 (len(ignore_contents), ignore_contents))
3557 ignore_contents = ignore_contents + 'PROPS-END\n'
3558 ignore_len = len(ignore_contents)
3560 # write headers, then props
3561 self.dumpfile.write('Node-path: %s\n'
3562 'Node-kind: dir\n'
3563 'Node-action: change\n'
3564 'Prop-content-length: %d\n'
3565 'Content-length: %d\n'
3566 '\n'
3567 '%s'
3568 % (self._utf8_path(dir_path), ignore_len,
3569 ignore_len, ignore_contents))
3571 pipe_cmd, pipe = get_co_pipe(c_rev)
3572 self.dumpfile.write('Node-path: %s\n'
3573 'Node-kind: file\n'
3574 'Node-action: %s\n'
3575 '%s' # no property header if no props
3576 'Text-content-length: '
3577 % (self._utf8_path(c_rev.svn_path),
3578 action, props_header))
3580 pos = self.dumpfile.tell()
3582 self.dumpfile.write('0000000000000000\n'
3583 'Text-content-md5: 00000000000000000000000000000000\n'
3584 'Content-length: 0000000000000000\n'
3585 '\n')
3587 if prop_contents:
3588 self.dumpfile.write(prop_contents)
3590 # Insert the rev contents, calculating length and checksum as we go.
3591 checksum = md5.new()
3592 length = 0
3593 normalize_crlf = sys.platform == "win32" and c_rev.mode != "b"
3594 trailing_cr = ""
3595 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3596 while buf:
3597 if normalize_crlf:
3598 buf = string.replace(buf,"\r\n","\n")
3599 if buf[-1] == "\r":
3600 trailing_cr = "\r"
3601 buf = buf[:-1]
3602 else:
3603 trailing_cr = ""
3604 checksum.update(buf)
3605 length = length + len(buf)
3606 self.dumpfile.write(buf)
3607 # optimize because of python's immutable strings
3608 if trailing_cr:
3609 buf = trailing_cr + pipe.fromchild.read(PIPE_READ_SIZE)
3610 else:
3611 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3612 pipe.fromchild.close()
3613 error_output = pipe.childerr.read()
3614 exit_status = pipe.wait()
3615 if exit_status:
3616 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3617 "and the following output:\n"
3618 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3620 # Go back to patch up the length and checksum headers:
3621 self.dumpfile.seek(pos, 0)
3622 # We left 16 zeros for the text length; replace them with the real
3623 # length, padded on the left with spaces:
3624 self.dumpfile.write('%16d' % length)
3625 # 16... + 1 newline + len('Text-content-md5: ') == 35
3626 self.dumpfile.seek(pos + 35, 0)
3627 self.dumpfile.write(checksum.hexdigest())
3628 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3629 self.dumpfile.seek(pos + 84, 0)
3630 # The content length is the length of property data, text data,
3631 # and any metadata around/inside around them.
3632 self.dumpfile.write('%16d' % (length + props_len))
3633 # Jump back to the end of the stream
3634 self.dumpfile.seek(0, 2)
3636 # This record is done (write two newlines -- one to terminate
3637 # contents that weren't themselves newline-termination, one to
3638 # provide a blank line for readability.
3639 self.dumpfile.write('\n\n')
3641 def add_path(self, s_item):
3642 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
3643 self._add_or_change_path(s_item, OP_ADD)
3645 def change_path(self, s_item):
3646 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
3647 self._add_or_change_path(s_item, OP_CHANGE)
3649 def delete_path(self, path):
3650 """Emit the deletion of PATH."""
3651 self.dumpfile.write('Node-path: %s\n'
3652 'Node-action: delete\n'
3653 '\n' % self._utf8_path(path))
3655 def copy_path(self, src_path, dest_path, src_revnum):
3656 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3657 # We don't need to include "Node-kind:" for copies; the loader
3658 # ignores it anyway and just uses the source kind instead.
3659 self.dumpfile.write('Node-path: %s\n'
3660 'Node-action: add\n'
3661 'Node-copyfrom-rev: %d\n'
3662 'Node-copyfrom-path: /%s\n'
3663 '\n'
3664 % (self._utf8_path(dest_path),
3665 src_revnum,
3666 self._utf8_path(src_path)))
3668 def finish(self):
3669 """Perform any cleanup necessary after all revisions have been
3670 committed."""
3671 self.dumpfile.close()
3674 class RepositoryDelegate(DumpfileDelegate):
3675 """Creates a new Subversion Repository. DumpfileDelegate does all
3676 of the heavy lifting."""
3677 def __init__(self):
3678 self.svnadmin = Ctx().svnadmin
3679 self.target = Ctx().target
3680 if not Ctx().existing_svnrepos:
3681 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3682 # We always pass the --bdb-txn-nosync switch to svnadmin here
3683 # because it gives us a 4-5x speed boost (If cvs2svn is creating
3684 # the repository, cvs2svn should be the only program accessing
3685 # the svn repository (until cvs is done, at least)). However,
3686 # for the sake of caution, we'll turn no-sync off in self.finish
3687 # unless the user passed --bdb-txn-nosync to cvs2svn.
3688 run_command('%s create %s %s' % (self.svnadmin, "--bdb-txn-nosync",
3689 self.target))
3691 # Since the output of this run is a repository, not a dumpfile,
3692 # the temporary dumpfiles we create should go in the tmpdir.
3693 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
3695 # This is 1 if a commit is in progress, otherwise None.
3696 self._commit_in_progress = None
3698 self.dumpfile = open(self.dumpfile_path, 'w+b')
3699 self.loader_pipe = Popen3('%s load -q %s' % (self.svnadmin, self.target),
3700 True)
3701 self.loader_pipe.fromchild.close()
3702 try:
3703 self._write_dumpfile_header(self.loader_pipe.tochild)
3704 except IOError:
3705 sys.stderr.write("%s: svnadmin failed with the following output while "
3706 "loading the dumpfile:\n" % (error_prefix))
3707 sys.stderr.write(self.loader_pipe.childerr.read())
3708 sys.exit(1)
3710 def _feed_pipe(self):
3711 """Feed the revision stored in the dumpfile to the svnadmin
3712 load pipe."""
3713 self.dumpfile.seek(0)
3714 while 1:
3715 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3716 if not len(data):
3717 break
3718 try:
3719 self.loader_pipe.tochild.write(data)
3720 except IOError:
3721 sys.stderr.write("%s: svnadmin failed with the following output while "
3722 "loading the dumpfile:\n" % (error_prefix))
3723 sys.stderr.write(self.loader_pipe.childerr.read())
3724 sys.exit(1)
3726 def start_commit(self, svn_commit):
3727 """Start a new commit. If a commit is already in progress, close
3728 the dumpfile, load it into the svn repository, open a new
3729 dumpfile, and write the header into it."""
3730 if self._commit_in_progress:
3731 self._feed_pipe()
3732 self.dumpfile.seek(0)
3733 self.dumpfile.truncate()
3734 DumpfileDelegate.start_commit(self, svn_commit)
3735 self._commit_in_progress = 1
3737 def finish(self):
3738 """Loads the last commit into the repository."""
3739 self._feed_pipe()
3740 self.dumpfile.close()
3741 self.loader_pipe.tochild.close()
3742 error_output = self.loader_pipe.childerr.read()
3743 exit_status = self.loader_pipe.wait()
3744 if exit_status:
3745 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3746 'and the following output:\n'
3747 '%s' % (error_prefix, exit_status, error_output))
3748 os.remove(self.dumpfile_path)
3750 # If we created the repository and --bdb-no-sync wasn't passed,
3751 # then comment out the DB_TXN_NOSYNC line in the DB_CONFIG file
3752 if (not Ctx().existing_svnrepos) and (not Ctx().bdb_txn_nosync):
3753 db_config = os.path.join(self.target, "db/DB_CONFIG")
3754 no_sync = 'set_flags DB_TXN_NOSYNC\n'
3756 contents = open(db_config, 'r').readlines()
3757 index = contents.index(no_sync)
3758 contents[index] = '# ' + no_sync
3759 contents = open(db_config, 'w').writelines(contents)
3762 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3763 """Makes no changes to the disk, but writes out information to
3764 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3765 print statements will state that we're doing something, when in
3766 reality, we aren't doing anything other than printing out that we're
3767 doing something. Kind of zen, really."""
3768 def __init__(self, total_revs):
3769 self.total_revs = total_revs
3771 def start_commit(self, svn_commit):
3772 """Prints out the Subversion revision number of the commit that is
3773 being started."""
3774 Log().write(LOG_VERBOSE, "=" * 60)
3775 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3776 (svn_commit.revnum, self.total_revs))
3778 def mkdir(self, path):
3779 """Print a line stating that we are creating directory PATH."""
3780 Log().write(LOG_VERBOSE, " New Directory", path)
3782 def add_path(self, s_item):
3783 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
3784 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
3786 def change_path(self, s_item):
3787 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
3788 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
3790 def delete_path(self, path):
3791 """Print a line stating that we are 'deleting' PATH."""
3792 Log().write(LOG_VERBOSE, " Deleting", path)
3794 def copy_path(self, src_path, dest_path, src_revnum):
3795 """Print a line stating that we are 'copying' revision SRC_REVNUM
3796 of SRC_PATH to DEST_PATH."""
3797 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3798 Log().write(LOG_VERBOSE, " to", dest_path)
3800 def finish(self):
3801 """State that we are done creating our repository."""
3802 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3803 Log().write(LOG_QUIET, "Done.")
3805 # This should be a local to pass1,
3806 # but Python 2.0 does not support nested scopes.
3807 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3808 def pass1():
3809 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3810 cd = CollectData()
3812 def visit_file(baton, dirname, files):
3813 cd = baton
3814 for fname in files:
3815 if fname[-2:] != ',v':
3816 continue
3817 cd.found_valid_file = 1
3818 pathname = os.path.join(dirname, fname)
3819 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3820 # drop the 'Attic' portion from the pathname for the canonical name.
3821 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3822 else:
3823 # If this file also exists in the attic, it's a fatal error
3824 attic_path = os.path.join(dirname, 'Attic', fname)
3825 if os.path.exists(attic_path):
3826 err = "%s: A CVS repository cannot contain both %s and %s" \
3827 % (error_prefix, pathname, attic_path)
3828 sys.stderr.write(err + '\n')
3829 cd.fatal_errors.append(err)
3830 cd.set_fname(pathname, pathname)
3831 Log().write(LOG_NORMAL, pathname)
3832 try:
3833 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3834 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3835 err = "%s: '%s' is not a valid ,v file" \
3836 % (error_prefix, pathname)
3837 sys.stderr.write(err + '\n')
3838 cd.fatal_errors.append(err)
3839 except:
3840 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3841 raise
3843 os.path.walk(Ctx().cvsroot, visit_file, cd)
3844 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3846 cd.write_symbol_db()
3848 if len(cd.fatal_errors) > 0:
3849 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3850 + "Error summary:\n"
3851 + "\n".join(cd.fatal_errors)
3852 + "\nExited due to fatal error(s).")
3854 if cd.found_valid_file is None:
3855 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3856 + "Are you absolutely certain you are pointing cvs2svn\n"
3857 + "at a CVS repository?\n"
3858 + "\nExited due to fatal error(s).")
3860 StatsKeeper().reset_c_rev_info()
3861 StatsKeeper().archive()
3862 Log().write(LOG_QUIET, "Done")
3864 def pass2():
3865 "Pass 2: clean up the revision information."
3867 symbol_db = SymbolDatabase()
3868 symbol_db.read()
3870 # Convert the list of regexps to a list of strings
3871 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
3873 error_detected = 0
3875 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
3876 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
3877 if blocked_excludes:
3878 for branch, blockers in blocked_excludes.items():
3879 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
3880 "excluded because the following symbols depend "
3881 "on it:\n" % (branch))
3882 for blocker in blockers:
3883 sys.stderr.write(" '%s'\n" % (blocker))
3884 sys.stderr.write("\n")
3885 error_detected = 1
3887 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
3888 invalid_forced_tags = [ ]
3889 for forced_tag in Ctx().forced_tags:
3890 if excludes.has_key(forced_tag):
3891 continue
3892 if symbol_db.branch_has_commit(forced_tag):
3893 invalid_forced_tags.append(forced_tag)
3894 if invalid_forced_tags:
3895 sys.stderr.write(error_prefix + ": The following branches cannot be "
3896 "forced to be tags because they have commits:\n")
3897 for tag in invalid_forced_tags:
3898 sys.stderr.write(" '%s'\n" % (tag))
3899 sys.stderr.write("\n")
3900 error_detected = 1
3902 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
3903 mismatches = symbol_db.find_mismatches(excludes)
3904 def is_not_forced(mismatch):
3905 name = mismatch[0]
3906 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
3907 mismatches = filter(is_not_forced, mismatches)
3908 if mismatches:
3909 sys.stderr.write(error_prefix + ": The following symbols are tags "
3910 "in some files and branches in others.\nUse "
3911 "--force-tag, --force-branch and/or --exclude to "
3912 "resolve the symbols.\n")
3913 for name, tag_count, branch_count, commit_count in mismatches:
3914 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
3915 "%d files and has commits in %d files.\n"
3916 % (name, tag_count, branch_count, commit_count))
3917 error_detected = 1
3919 # Bail out now if we found errors
3920 if error_detected:
3921 sys.exit(1)
3923 # Create the tags database
3924 tags_db = TagsDatabase(DB_OPEN_NEW)
3925 for tag in symbol_db.tags.keys():
3926 if tag not in Ctx().forced_branches:
3927 tags_db[tag] = None
3928 for tag in Ctx().forced_tags:
3929 tags_db[tag] = None
3931 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
3933 # We may have recorded some changes in revisions' timestamp. We need to
3934 # scan for any other files which may have had the same log message and
3935 # occurred at "the same time" and change their timestamps, too.
3937 # read the resync data file
3938 def read_resync(fname):
3939 "Read the .resync file into memory."
3941 ### note that we assume that we can hold the entire resync file in
3942 ### memory. really large repositories with whacky timestamps could
3943 ### bust this assumption. should that ever happen, then it is possible
3944 ### to split the resync file into pieces and make multiple passes,
3945 ### using each piece.
3948 # A digest maps to a sequence of lists which specify a lower and upper
3949 # time bound for matching up the commit. We keep a sequence of these
3950 # because a number of checkins with the same log message (e.g. an empty
3951 # log message) could need to be remapped. We also make them a list because
3952 # we will dynamically expand the lower/upper bound as we find commits
3953 # that fall into a particular msg and time range.
3955 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
3957 resync = { }
3959 for line in fileinput.FileInput(fname):
3960 t1 = int(line[:8], 16)
3961 digest = line[9:DIGEST_END_IDX]
3962 t2 = int(line[DIGEST_END_IDX+1:], 16)
3963 t1_l = t1 - COMMIT_THRESHOLD/2
3964 t1_u = t1 + COMMIT_THRESHOLD/2
3965 if resync.has_key(digest):
3966 resync[digest].append([t1_l, t1_u, t2])
3967 else:
3968 resync[digest] = [ [t1_l, t1_u, t2] ]
3970 # For each digest, sort the resync items in it in increasing order,
3971 # based on the lower time bound.
3972 digests = resync.keys()
3973 for digest in digests:
3974 (resync[digest]).sort()
3976 return resync
3978 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
3980 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
3981 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
3983 # process the revisions file, looking for items to clean up
3984 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
3985 c_rev = CVSRevision(Ctx(), line[:-1])
3987 # Skip this entire revision if it's on an excluded branch
3988 if excludes.has_key(c_rev.branch_name):
3989 continue
3991 # Remove all references to excluded tags and branches
3992 def not_excluded(symbol, excludes=excludes):
3993 return not excludes.has_key(symbol)
3994 c_rev.branches = filter(not_excluded, c_rev.branches)
3995 c_rev.tags = filter(not_excluded, c_rev.tags)
3997 # Convert all branches that are forced to be tags
3998 for forced_tag in Ctx().forced_tags:
3999 if forced_tag in c_rev.branches:
4000 c_rev.branches.remove(forced_tag)
4001 c_rev.tags.append(forced_tag)
4003 # Convert all tags that are forced to be branches
4004 for forced_branch in Ctx().forced_branches:
4005 if forced_branch in c_rev.tags:
4006 c_rev.tags.remove(forced_branch)
4007 c_rev.branches.append(forced_branch)
4009 # see if this is "near" any of the resync records we
4010 # have recorded for this digest [of the log message].
4011 for record in resync.get(c_rev.digest, []):
4012 if record[0] <= c_rev.timestamp <= record[1]:
4013 # bingo! remap the time on this (record[2] is the new time).
4015 # adjust the time range. we want the COMMIT_THRESHOLD from the
4016 # bounds of the earlier/latest commit in this group.
4017 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4018 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4020 # By default this will be the new timestamp
4021 new_timestamp = record[2]
4022 # If the new timestamp is earlier than that of our previous revision
4023 if record[2] < c_rev.prev_timestamp:
4024 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4025 + " to time %s, which is before previous the time of"
4026 + " revision %s (%s):")
4027 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4028 c_rev.cvs_path, record[2],
4029 c_rev.prev_rev, c_rev.prev_timestamp))
4030 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4031 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4032 # attempted sync time, then sync back to c_rev.prev_timestapm
4033 # + 1...
4034 if (c_rev.prev_timestamp - record[2]) < COMMIT_THRESHOLD:
4035 new_timestamp = c_rev.prev_timestamp + 1
4036 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4037 new_timestamp))
4038 # ...otherwise, make no change
4039 else:
4040 new_timestamp = c_rev.timestamp
4041 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4042 warning_prefix)
4044 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4045 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4046 record[2] - c_rev.timestamp)
4047 Log().write(LOG_VERBOSE, msg)
4049 c_rev.timestamp = new_timestamp
4051 # stop looking for hits
4052 break
4054 output.write(str(c_rev) + "\n")
4055 Log().write(LOG_QUIET, "Done")
4057 def pass3():
4058 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4059 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4060 temp(DATAFILE + SORTED_REVS_SUFFIX))
4061 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4062 Log().write(LOG_QUIET, "Done")
4064 def pass4():
4065 """Iterate through sorted revs, storing them in a database.
4066 If we're not doing a trunk-only conversion, generate the
4067 LastSymbolicNameDatabase, which contains the last CVSRevision
4068 that is a source for each tag or branch.
4070 Log().write(LOG_QUIET,
4071 "Copying CVS revision data from flat file to database...")
4072 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4073 if not Ctx().trunk_only:
4074 Log().write(LOG_QUIET,
4075 "and finding last CVS revisions for all symbolic names...")
4076 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4077 else:
4078 # This is to avoid testing Ctx().trunk_only every time around the loop
4079 class DummyLSNDB:
4080 def noop(*args): pass
4081 log_revision = noop
4082 create_database = noop
4083 last_sym_name_db = DummyLSNDB()
4085 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4086 c_rev = CVSRevision(Ctx(), line[:-1])
4087 cvs_revs_db.log_revision(c_rev)
4088 last_sym_name_db.log_revision(c_rev)
4089 StatsKeeper().record_c_rev(c_rev)
4091 last_sym_name_db.create_database()
4092 StatsKeeper().archive()
4093 Log().write(LOG_QUIET, "Done")
4095 def pass5():
4097 Generate the SVNCommit <-> CVSRevision mapping
4098 databases. CVSCommit._commit also calls SymbolingsLogger to register
4099 CVSRevisions that represent an opening or closing for a path on a
4100 branch or tag. See SymbolingsLogger for more details.
4102 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4104 aggregator = CVSRevisionAggregator()
4105 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4106 c_rev = CVSRevision(Ctx(), line[:-1])
4107 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4108 aggregator.process_revision(c_rev)
4109 aggregator.flush()
4111 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4112 StatsKeeper().archive()
4113 Log().write(LOG_QUIET, "Done")
4115 def pass6():
4116 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4118 if not Ctx().trunk_only:
4119 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4120 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4121 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4122 Log().write(LOG_QUIET, "Done")
4124 def pass7():
4125 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4127 def generate_offsets_for_symbolings():
4128 """This function iterates through all the lines in
4129 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4130 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4131 where SYMBOLIC_NAME is first encountered. This will allow us to
4132 seek to the various offsets in the file and sequentially read only
4133 the openings and closings that we need."""
4135 ###PERF This is a fine example of a db that can be in-memory and
4136 #just flushed to disk when we're done. Later, it can just be sucked
4137 #back into memory.
4138 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4139 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4141 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4142 old_sym = ""
4143 while 1:
4144 fpos = file.tell()
4145 line = file.readline()
4146 if not line:
4147 break
4148 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4149 if not sym == old_sym:
4150 Log().write(LOG_VERBOSE, " ", sym)
4151 old_sym = sym
4152 offsets_db[sym] = fpos
4154 if not Ctx().trunk_only:
4155 generate_offsets_for_symbolings()
4156 Log().write(LOG_QUIET, "Done.")
4158 def pass8():
4159 svncounter = 2 # Repository initialization is 1.
4160 repos = SVNRepositoryMirror()
4161 persistence_manager = PersistenceManager(DB_OPEN_READ)
4163 if (Ctx().target):
4164 if not Ctx().dry_run:
4165 repos.add_delegate(RepositoryDelegate())
4166 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4167 else:
4168 if not Ctx().dry_run:
4169 repos.add_delegate(DumpfileDelegate())
4170 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4172 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4174 while(1):
4175 svn_commit = persistence_manager.get_svn_commit(svncounter)
4176 if not svn_commit:
4177 break
4178 repos.commit(svn_commit)
4179 svncounter += 1
4181 repos.finish()
4183 _passes = [
4184 pass1,
4185 pass2,
4186 pass3,
4187 pass4,
4188 pass5,
4189 pass6,
4190 pass7,
4191 pass8,
4195 class Ctx:
4196 """Session state for this run of cvs2svn. For example, run-time
4197 options are stored here. This class is a Borg, see
4198 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4200 __shared_state = { }
4201 def __init__(self):
4202 self.__dict__ = self.__shared_state
4203 if self.__dict__:
4204 return
4205 # Else, initialize to defaults.
4206 self.cvsroot = None
4207 self.target = None
4208 self.dumpfile = DUMPFILE
4209 self.tmpdir = '.'
4210 self.verbose = 0
4211 self.quiet = 0
4212 self.prune = 1
4213 self.existing_svnrepos = 0
4214 self.dump_only = 0
4215 self.dry_run = 0
4216 self.trunk_only = 0
4217 self.trunk_base = "trunk"
4218 self.tags_base = "tags"
4219 self.branches_base = "branches"
4220 self.encoding = "ascii"
4221 self.mime_types_file = None
4222 self.mime_mapper = None
4223 self.no_default_eol = 0
4224 self.eol_from_mime_type = 0
4225 self.keywords_off = 0
4226 self.use_cvs = None
4227 self.svnadmin = "svnadmin"
4228 self.username = None
4229 self.print_help = 0
4230 self.skip_cleanup = 0
4231 self.cvs_revnums = 0
4232 self.bdb_txn_nosync = 0
4233 self.forced_branches = []
4234 self.forced_tags = []
4235 self.excludes = []
4236 self.symbol_transforms = []
4238 class MimeMapper:
4239 """A class that provides mappings from file names to MIME types.
4240 Note that we should really be using Python's 'mimetypes' module.
4241 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4242 for more."""
4244 def __init__(self):
4245 self.mappings = { }
4247 def set_mime_types_file(self, mime_types_file):
4248 for line in fileinput.input(mime_types_file):
4249 if line.startswith("#"):
4250 continue
4252 # format of a line is something like
4253 # text/plain c h cpp
4254 extensions = line.split()
4255 if len(extensions) < 2:
4256 continue
4257 type = extensions.pop(0)
4258 for ext in extensions:
4259 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4260 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
4261 % (warning_prefix, ext, self.mappings[ext], type))
4262 self.mappings[ext] = type
4265 def get_type_from_filename(self, filename):
4266 basename, extension = os.path.splitext(os.path.basename(filename))
4268 # Extension includes the dot, so strip it (will leave extension
4269 # empty if filename ends with a dot, which is ok):
4270 extension = extension[1:]
4272 # If there is no extension (or the file ends with a period), use
4273 # the base name for mapping. This allows us to set mappings for
4274 # files such as README or Makefile:
4275 if not extension:
4276 extension = basename
4277 if self.mappings.has_key(extension):
4278 return self.mappings[extension]
4279 return None
4282 def convert(start_pass, end_pass):
4283 "Convert a CVS repository to an SVN repository."
4285 cleanup = Cleanup()
4286 times = [ None ] * (end_pass + 1)
4287 times[start_pass - 1] = time.time()
4288 StatsKeeper().set_start_time(time.time())
4289 for i in range(start_pass - 1, end_pass):
4290 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4291 _passes[i]()
4292 times[i + 1] = time.time()
4293 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4294 # Dispose of items in Ctx() not intended to live past the end of the pass
4295 # (Identified by exactly one leading underscore)
4296 for attr in dir(Ctx()):
4297 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4298 and not attr[:6] == "_Ctx__"):
4299 delattr(Ctx(), attr)
4300 if not Ctx().skip_cleanup:
4301 cleanup.cleanup(_passes[i])
4302 StatsKeeper().set_end_time(time.time())
4304 Log().write(LOG_QUIET, StatsKeeper())
4305 if end_pass < 4:
4306 Log().write(LOG_QUIET, '(These are unaltered CVS repository stats and do not\n'
4307 + ' reflect tags or branches excluded via --exclude)\n')
4308 print StatsKeeper().timings()
4311 def usage():
4312 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4313 % os.path.basename(sys.argv[0])
4314 print ' --help, -h print this usage message and exit with success'
4315 print ' --version print the version number'
4316 print ' -q quiet'
4317 print ' -v verbose'
4318 print ' -s PATH path for SVN repos'
4319 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
4320 print ' If only START is given, run only pass START'
4321 print ' (implicitly enables --skip-cleanup)'
4322 print ' --existing-svnrepos load into existing SVN repository'
4323 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4324 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4325 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4326 print ' --dry-run do not create a repository or a dumpfile;'
4327 print ' just print what would happen.'
4328 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4329 print ' (only use this if having problems with RCS)'
4330 print ' --svnadmin=PATH path to the svnadmin program'
4331 print ' --trunk-only convert only trunk commits, not tags nor branches'
4332 print ' --trunk=PATH path for trunk (default: %s)' \
4333 % Ctx().trunk_base
4334 print ' --branches=PATH path for branches (default: %s)' \
4335 % Ctx().branches_base
4336 print ' --tags=PATH path for tags (default: %s)' \
4337 % Ctx().tags_base
4338 print ' --no-prune don\'t prune empty directories'
4339 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4340 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
4341 % Ctx().encoding
4342 print ' --force-branch=NAME force NAME to be a branch'
4343 print ' --force-tag=NAME force NAME to be a tag'
4344 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4345 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4346 print ' use Python regexp and reference syntax respectively'
4347 print ' --username=NAME username for cvs2svn-synthesized commits'
4348 print ' --skip-cleanup prevent the deletion of intermediate files'
4349 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4350 print ' --cvs-revnums record CVS revision numbers as file properties'
4351 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4352 ' setting svn:mime-type'
4353 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4354 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4355 print ' --keywords-off don\'t set svn:keywords on any files (cvs2svn sets'
4356 print ' "svn:keywords to author date id" on non-binary files'
4357 print ' by default)'
4359 def main():
4360 # Convenience var, so we don't have to keep instantiating this Borg.
4361 ctx = Ctx()
4363 profiling = None
4364 start_pass = 1
4365 end_pass = len(_passes)
4367 try:
4368 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4369 [ "help", "create", "trunk=",
4370 "username=", "existing-svnrepos",
4371 "branches=", "tags=", "encoding=",
4372 "force-branch=", "force-tag=", "exclude=",
4373 "use-cvs", "mime-types=",
4374 "eol-from-mime-type", "no-default-eol",
4375 "trunk-only", "no-prune", "dry-run",
4376 "dump-only", "dumpfile=", "tmpdir=",
4377 "svnadmin=", "skip-cleanup", "cvs-revnums",
4378 "bdb-txn-nosync", "version", "profile",
4379 "keywords-off", "symbol-transform="])
4380 except getopt.GetoptError, e:
4381 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4382 usage()
4383 sys.exit(1)
4385 for opt, value in opts:
4386 if opt == '--version':
4387 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4388 sys.exit(0)
4389 elif opt == '-p':
4390 # Don't cleanup if we're doing incrementals.
4391 ctx.skip_cleanup = 1
4392 if value.find(':') > 0:
4393 start_pass, end_pass = map(int, value.split(':'))
4394 else:
4395 end_pass = start_pass = int(value)
4396 if start_pass > len(_passes) or start_pass < 1:
4397 print '%s: illegal value (%d) for starting pass. '\
4398 'must be 1 through %d.' % (error_prefix, int(start_pass),
4399 len(_passes))
4400 sys.exit(1)
4401 if end_pass < start_pass or end_pass > len(_passes):
4402 print '%s: illegal value (%d) for ending pass. ' \
4403 'must be %d through %d.' % (error_prefix, int(end_pass),
4404 int(start_pass), len(_passes))
4405 sys.exit(1)
4406 elif (opt == '--help') or (opt == '-h'):
4407 ctx.print_help = 1
4408 elif opt == '-v':
4409 Log().log_level = LOG_VERBOSE
4410 ctx.verbose = 1
4411 elif opt == '-q':
4412 Log().log_level = LOG_QUIET
4413 ctx.quiet = 1
4414 elif opt == '-s':
4415 ctx.target = value
4416 elif opt == '--existing-svnrepos':
4417 ctx.existing_svnrepos = 1
4418 elif opt == '--dumpfile':
4419 ctx.dumpfile = value
4420 elif opt == '--tmpdir':
4421 ctx.tmpdir = value
4422 elif opt == '--use-cvs':
4423 ctx.use_cvs = 1
4424 elif opt == '--svnadmin':
4425 ctx.svnadmin = value
4426 elif opt == '--trunk-only':
4427 ctx.trunk_only = 1
4428 elif opt == '--trunk':
4429 if not value:
4430 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4431 ctx.trunk_base = value
4432 elif opt == '--branches':
4433 if not value:
4434 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4435 ctx.branches_base = value
4436 elif opt == '--tags':
4437 if not value:
4438 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4439 ctx.tags_base = value
4440 elif opt == '--no-prune':
4441 ctx.prune = None
4442 elif opt == '--dump-only':
4443 ctx.dump_only = 1
4444 elif opt == '--dry-run':
4445 ctx.dry_run = 1
4446 elif opt == '--encoding':
4447 ctx.encoding = value
4448 elif opt == '--force-branch':
4449 ctx.forced_branches.append(value)
4450 elif opt == '--force-tag':
4451 ctx.forced_tags.append(value)
4452 elif opt == '--exclude':
4453 try:
4454 ctx.excludes.append(re.compile('^' + value + '$'))
4455 except re.error, e:
4456 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4457 elif opt == '--mime-types':
4458 ctx.mime_types_file = value
4459 elif opt == '--eol-from-mime-type':
4460 ctx.eol_from_mime_type = 1
4461 elif opt == '--no-default-eol':
4462 ctx.no_default_eol = 1
4463 elif opt == '--keywords-off':
4464 ctx.keywords_off = 1
4465 elif opt == '--username':
4466 ctx.username = value
4467 elif opt == '--skip-cleanup':
4468 ctx.skip_cleanup = 1
4469 elif opt == '--cvs-revnums':
4470 ctx.cvs_revnums = 1
4471 elif opt == '--bdb-txn-nosync':
4472 ctx.bdb_txn_nosync = 1
4473 elif opt == '--create':
4474 sys.stderr.write(warning_prefix +
4475 ': The behaviour produced by the --create option is now the '
4476 'default,\nand passing the option is deprecated.\n')
4477 elif opt == '--profile':
4478 profiling = 1
4479 elif opt == '--symbol-transform':
4480 ctx.symbol_transforms.append(value.split(":"))
4482 if ctx.print_help:
4483 usage()
4484 sys.exit(0)
4486 # Consistency check for options and arguments.
4487 if len(args) == 0:
4488 usage()
4489 sys.exit(1)
4491 if len(args) > 1:
4492 sys.stderr.write(error_prefix +
4493 ": must pass only one CVS repository.\n")
4494 usage()
4495 sys.exit(1)
4497 ctx.cvsroot = args[0]
4499 if not os.path.isdir(ctx.cvsroot):
4500 sys.stderr.write(error_prefix +
4501 ": the given CVS repository path '%s' is not an "
4502 "existing directory.\n" % ctx.cvsroot)
4503 sys.exit(1)
4505 if ctx.use_cvs:
4506 # Ascend above the specified root if necessary, to find the cvs_repository
4507 # (a directory containing a CVSROOT directory) and the cvs_module (the
4508 # path of the conversion root within the cvs repository)
4509 # NB: cvs_module must be seperated by '/' *not* by os.sep .
4510 ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
4511 prev_cvs_repository = None
4512 ctx.cvs_module = ""
4513 while prev_cvs_repository != ctx.cvs_repository:
4514 if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
4515 break
4516 prev_cvs_repository = ctx.cvs_repository
4517 ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
4518 ctx.cvs_module = module_component + "/" + ctx.cvs_module
4519 else:
4520 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
4521 sys.stderr.write(error_prefix +
4522 ": the path '%s' is not a CVS repository, nor a path " \
4523 "within a CVS repository. A CVS repository contains " \
4524 "a CVSROOT directory within its root directory.\n" \
4525 % ctx.cvsroot)
4526 sys.exit(1)
4527 os.environ['CVSROOT'] = ctx.cvs_repository
4529 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
4530 sys.stderr.write(error_prefix +
4531 ": must pass one of '-s' or '--dump-only'.\n")
4532 sys.exit(1)
4534 def not_both(opt1val, opt1name, opt2val, opt2name):
4535 if opt1val and opt2val:
4536 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4537 % (opt1name, opt2name))
4538 sys.exit(1)
4540 not_both(ctx.target, '-s', ctx.dump_only, '--dump-only')
4542 not_both(ctx.dump_only, '--dump-only',
4543 ctx.existing_svnrepos, '--existing-svnrepos')
4545 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4546 ctx.existing_svnrepos, '--existing-svnrepos')
4548 not_both(ctx.dump_only, '--dump-only',
4549 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4551 not_both(ctx.quiet, '-q',
4552 ctx.verbose, '-v')
4554 if ((string.find(ctx.trunk_base, '/') > -1)
4555 or (string.find(ctx.tags_base, '/') > -1)
4556 or (string.find(ctx.branches_base, '/') > -1)):
4557 sys.stderr.write("%s: cannot pass multicomponent path to "
4558 "--trunk, --tags, or --branches yet.\n"
4559 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4560 "id=7 for details.\n" % error_prefix)
4561 sys.exit(1)
4563 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4564 sys.stderr.write(error_prefix +
4565 ": the svn-repos-path '%s' is not an "
4566 "existing directory.\n" % ctx.target)
4567 sys.exit(1)
4569 if not ctx.dump_only and not ctx.existing_svnrepos \
4570 and (not ctx.dry_run) and os.path.exists(ctx.target):
4571 sys.stderr.write(error_prefix +
4572 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4573 "'--existing-svnrepos'.\n" % ctx.target)
4574 sys.exit(1)
4576 if ctx.mime_types_file:
4577 ctx.mime_mapper = MimeMapper()
4578 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4580 # Make sure the tmp directory exists. Note that we don't check if
4581 # it's empty -- we want to be able to use, for example, "." to hold
4582 # tempfiles. But if we *did* want check if it were empty, we'd do
4583 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
4584 if not os.path.exists(ctx.tmpdir):
4585 os.mkdir(ctx.tmpdir)
4586 elif not os.path.isdir(ctx.tmpdir):
4587 sys.stderr.write(error_prefix +
4588 ": cvs2svn tried to use '%s' for temporary files, but that path\n"
4589 " exists and is not a directory. Please make it be a directory,\n"
4590 " or specify some other directory for temporary files.\n" \
4591 % ctx.tmpdir)
4592 sys.exit(1)
4594 if ctx.use_cvs:
4595 def cvs_ok():
4596 pipe = Popen3('cvs %s --version' % Ctx().cvs_global_arguments, True)
4597 pipe.tochild.close()
4598 pipe.fromchild.read()
4599 errmsg = pipe.childerr.read()
4600 status = pipe.wait()
4601 ok = len(errmsg) == 0 and status == 0
4602 return (ok, status, errmsg)
4604 ctx.cvs_global_arguments = "-q -R"
4605 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4606 if not ok:
4607 ctx.cvs_global_arguments = "-q"
4608 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4610 if not ok:
4611 sys.stderr.write(error_prefix +
4612 ": error executing CVS: status %s, error output:\n" \
4613 % (cvs_exitstatus) + cvs_errmsg)
4615 # But do lock the tmpdir, to avoid process clash.
4616 try:
4617 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4618 except OSError, e:
4619 if e.errno == errno.EACCES:
4620 sys.stderr.write(error_prefix + ": Permission denied:"
4621 + " No write access to output directory.\n")
4622 sys.exit(1)
4623 if e.errno == errno.EEXIST:
4624 sys.stderr.write(error_prefix +
4625 ": cvs2svn is using directory '%s' for temporary files, but\n"
4626 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
4627 " cvs2svn process is currently using '%s' as its temporary\n"
4628 " workspace. If you are certain that is not the case,\n"
4629 " then remove the '%s/cvs2svn.lock' subdirectory.\n" \
4630 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir))
4631 sys.exit(1)
4632 raise
4633 try:
4634 if profiling:
4635 import hotshot
4636 prof = hotshot.Profile('cvs2svn.hotshot')
4637 prof.runcall(convert, start_pass, end_pass)
4638 prof.close()
4639 else:
4640 convert(start_pass, end_pass)
4641 finally:
4642 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4643 except: pass
4645 if __name__ == '__main__':
4646 main()