* cvs2svn: It's silly (and pointless) to "import string" twice.
[cvs2svn.git] / cvs2svn
blob4400d16cbe8a81cdd37bd178468e807936b6df3c
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
37 # Warnings and errors start with these strings. They are typically
38 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
39 warning_prefix = "WARNING"
40 error_prefix = "ERROR"
42 # Make sure this Python is recent enough.
43 if sys.hexversion < 0x2000000:
44 sys.stderr.write("'%s: Python 2.0 or higher required, "
45 "see www.python.org.\n" % error_prefix)
46 sys.exit(1)
48 # Pretend we have true booleans on older python versions
49 try:
50 True
51 except:
52 True = 1
53 False = 0
55 # Minimal, incomplete, version of popen2.Popen3 for those platforms
56 # for which popen2 does not provide it.
57 try:
58 Popen3 = popen2.Popen3
59 except AttributeError:
60 class Popen3:
61 def __init__(self, cmd, capturestderr):
62 if type(cmd) != str:
63 cmd = " ".join(cmd)
64 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
65 mode='b')
66 def wait(self):
67 return self.fromchild.close() or self.tochild.close() or \
68 self.childerr.close()
70 # DBM module selection
72 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
73 # so that the dbhash module used by anydbm will use bsddb3.
74 try:
75 import bsddb3
76 sys.modules['bsddb'] = sys.modules['bsddb3']
77 except ImportError:
78 pass
80 # 2. These DBM modules are not good for cvs2svn.
81 import anydbm
82 if (anydbm._defaultmod.__name__ == 'dumbdbm'
83 or anydbm._defaultmod.__name__ == 'dbm'):
84 print 'ERROR: your installation of Python does not contain a suitable'
85 print ' DBM module. This script cannot continue.'
86 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
87 print ' for details.'
88 sys.exit(1)
90 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
91 # Unfortunately, gdbm appears not to be trouble free, either.
92 if hasattr(anydbm._defaultmod, 'bsddb') \
93 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
94 try:
95 gdbm = __import__('gdbm')
96 except ImportError:
97 sys.stderr.write(warning_prefix +
98 ': The version of the bsddb module found '
99 'on your computer has been reported to malfunction on some datasets, '
100 'causing KeyError exceptions. You may wish to upgrade your Python to '
101 'version 2.3 or later.\n')
102 else:
103 anydbm._defaultmod = gdbm
105 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
106 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
107 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
109 # This really only matches standard '1.1.1.*'-style vendor revisions.
110 # One could conceivably have a file whose default branch is 1.1.3 or
111 # whatever, or was that at some point in time, with vendor revisions
112 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
113 # is the only time this regexp gets used), we'd have no basis for
114 # assuming that the non-standard vendor branch had ever been the
115 # default branch anyway, so we don't want this to match them anyway.
116 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
118 # If this run's output is a repository, then (in the tmpdir) we use
119 # a dumpfile of this name for repository loads.
121 # If this run's output is a dumpfile, then this is default name of
122 # that dumpfile, but in the current directory (unless the user has
123 # specified a dumpfile path, of course, in which case it will be
124 # wherever the user said).
125 DUMPFILE = 'cvs2svn-dump'
127 # This file appears with different suffixes at different stages of
128 # processing. CVS revisions are cleaned and sorted here, for commit
129 # grouping. See design-notes.txt for details.
130 DATAFILE = 'cvs2svn-data'
132 # This file contains a marshalled copy of all the statistics that we
133 # gather throughout the various runs of cvs2svn. The data stored as a
134 # marshalled dictionary.
135 STATISTICS_FILE = 'cvs2svn-statistics'
137 # This text file contains records (1 per line) that describe svn
138 # filesystem paths that are the opening and closing source revisions
139 # for copies to tags and branches. The format is as follows:
141 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
143 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
144 # SVN_REVNUM are the primary and secondary sorting criteria for
145 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
146 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
147 # A sorted version of the above file.
148 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
150 # This file is a temporary file for storing symbolic_name -> closing
151 # CVSRevision until the end of our pass where we can look up the
152 # corresponding SVNRevNum for the closing revs and write these out to
153 # the SYMBOL_OPENINGS_CLOSINGS.
154 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
156 # Skeleton version of an svn filesystem.
157 # (These supersede and will eventually replace the two above.)
158 # See class SVNRepositoryMirror for how these work.
159 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
160 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
162 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
163 # SYMBOL_OPENINGS_CLOSINGS_SORTED
164 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
166 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
167 # the CVSRevision is the last such that is a source for those symbolic
168 # names. For example, if branch B's number is 1.3.0.2 in this CVS
169 # file, and this file's 1.3 is the latest (by date) revision among
170 # *all* CVS files that is a source for branch B, then the
171 # CVSRevision.unique_key() corresponding to this file at 1.3 would
172 # list at least B in its list.
173 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
175 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
176 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
177 ### the s-revs data in this database.
178 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
180 # Lists all symbolic names that are tags. Keys are strings (symbolic
181 # names), values are ignorable.
182 TAGS_DB = 'cvs2svn-tags.db'
184 # A list all tags. Each line consists of the tag name and the number
185 # of files in which it exists, separated by a space.
186 TAGS_LIST = 'cvs2svn-tags.txt'
188 # A list of all branches. The file is stored as a plain text file
189 # to make it easy to look at in an editor. Each line contains the
190 # branch name, the number of files where the branch is created, the
191 # commit count, and a list of tags and branches that are defined on
192 # revisions in the branch.
193 BRANCHES_LIST = 'cvs2svn-branches.txt'
195 # These two databases provide a bidirectional mapping between
196 # CVSRevision.unique_key()s and Subversion revision numbers.
198 # The first maps CVSRevision.unique_key() to a number; the values are
199 # not unique.
201 # The second maps a number to a list of CVSRevision.unique_key()s.
202 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
203 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
205 # This database maps svn_revnums to tuples of (symbolic_name, date).
207 # The svn_revnums are the revision numbers of all non-primary
208 # SVNCommits. No primary SVNCommit has a key in this database.
210 # The date is stored for all commits in this database.
212 # For commits that fill symbolic names, the symbolic_name is stored.
213 # For commits that default branch syncs, the symbolic_name is None.
214 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
216 # This database maps svn_revnums of a default branch synchronization
217 # commit to the svn_revnum of the primary SVNCommit that motivated it.
219 # (NOTE: Secondary commits that fill branches and tags also have a
220 # motivating commit, but we do not record it because it is (currently)
221 # not needed for anything.)
223 # This mapping is used when generating the log message for the commit
224 # that synchronizes the default branch with trunk.
225 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
227 # How many bytes to read at a time from a pipe. 128 kiB should be
228 # large enough to be efficient without wasting too much memory.
229 PIPE_READ_SIZE = 128 * 1024
231 # Record the default RCS branches, if any, for CVS filepaths.
233 # The keys are CVS filepaths, relative to the top of the repository
234 # and with the ",v" stripped off, so they match the cvs paths used in
235 # Commit.commit(). The values are vendor branch revisions, such as
236 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
237 # represents the highest vendor branch revision thought to have ever
238 # been head of the default branch.
240 # The reason we record a specific vendor revision, rather than a
241 # default branch number, is that there are two cases to handle:
243 # One case is simple. The RCS file lists a default branch explicitly
244 # in its header, such as '1.1.1'. In this case, we know that every
245 # revision on the vendor branch is to be treated as head of trunk at
246 # that point in time.
248 # But there's also a degenerate case. The RCS file does not currently
249 # have a default branch, yet we can deduce that for some period in the
250 # past it probably *did* have one. For example, the file has vendor
251 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
252 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
253 # case, we should record 1.1.1.96 as the last vendor revision to have
254 # been the head of the default branch.
255 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
257 # Records the author and log message for each changeset.
258 # The keys are author+log digests, the same kind used to identify
259 # unique revisions in the .revs, etc files. Each value is a tuple
260 # of two elements: '(author logmessage)'.
261 METADATA_DB = "cvs2svn-metadata.db"
263 REVS_SUFFIX = '.revs'
264 CLEAN_REVS_SUFFIX = '.c-revs'
265 SORTED_REVS_SUFFIX = '.s-revs'
266 RESYNC_SUFFIX = '.resync'
268 SVN_INVALID_REVNUM = -1
270 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
272 # Things that can happen to a file.
273 OP_NOOP = '-'
274 OP_ADD = 'A'
275 OP_DELETE = 'D'
276 OP_CHANGE = 'C'
278 # A deltatext either does or doesn't represent some change.
279 DELTATEXT_NONEMPTY = 'N'
280 DELTATEXT_EMPTY = 'E'
282 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
284 # Constants used in SYMBOL_OPENINGS_CLOSINGS
285 OPENING = 'O'
286 CLOSING = 'C'
288 def temp(basename):
289 """Return a path to BASENAME in Ctx().tmpdir.
290 This is a convenience function to save horizontal space in source."""
291 return os.path.join(Ctx().tmpdir, basename)
293 # Since the unofficial set also includes [/\] we need to translate those
294 # into ones that don't conflict with Subversion limitations.
295 def _clean_symbolic_name(name):
296 """Return symbolic name NAME, translating characters that Subversion
297 does not allow in a pathname."""
298 name = name.replace('/','++')
299 name = name.replace('\\','--')
300 return name
302 def _path_join(*components):
303 """Join two or more pathname COMPONENTS, inserting '/' as needed.
304 Empty component are skipped."""
305 return string.join(filter(None, components), '/')
307 def run_command(command):
308 if os.system(command):
309 sys.exit('Command failed: "%s"' % command)
311 def relative_name(cvsroot, fname):
312 l = len(cvsroot)
313 if fname[:l] == cvsroot:
314 if fname[l] == os.sep:
315 return string.replace(fname[l+1:], os.sep, '/')
316 return string.replace(fname[l:], os.sep, '/')
317 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
318 " cvsroot\n" % (error_prefix, cvsroot, fname))
319 sys.exit(1)
321 def get_co_pipe(c_rev):
322 """Return a command string, and the pipe created using that string.
323 C_REV is a CVSRevision. The pipe returns the text of that CVS Revision."""
324 ctx = Ctx()
325 if ctx.use_cvs:
326 pipe_cmd = 'cvs %s co -r%s -p %s' % \
327 (ctx.cvs_global_arguments, c_rev.rev,
328 escape_shell_arg(ctx.cvs_module + c_rev.cvs_path))
329 else:
330 pipe_cmd = 'co -q -x,v -p%s %s' % \
331 (c_rev.rev, escape_shell_arg(c_rev.rcs_path()))
332 pipe = Popen3(pipe_cmd, True)
333 pipe.tochild.close()
334 return pipe_cmd, pipe
336 def generate_ignores(c_rev):
337 # Read in props
338 pipe_cmd, pipe = get_co_pipe(c_rev)
339 buf = pipe.fromchild.read(PIPE_READ_SIZE)
340 raw_ignore_val = ""
341 while buf:
342 raw_ignore_val = raw_ignore_val + buf
343 buf = pipe.fromchild.read(PIPE_READ_SIZE)
344 pipe.fromchild.close()
345 error_output = pipe.childerr.read()
346 exit_status = pipe.wait()
347 if exit_status:
348 sys.exit("%s: The command '%s' failed with exit status: %s\n"
349 "and the following output:\n"
350 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
352 # Tweak props: First, convert any spaces to newlines...
353 raw_ignore_val = '\n'.join(raw_ignore_val.split())
354 raw_ignores = raw_ignore_val.split('\n')
355 ignore_vals = [ ]
356 for ignore in raw_ignores:
357 # Reset the list if we encounter a '!'
358 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
359 if ignore == '!':
360 ignore_vals = [ ]
361 continue
362 # Skip empty lines
363 if len(ignore) == 0:
364 continue
365 ignore_vals.append(ignore)
366 return ignore_vals
368 # Return a string that has not been returned by gen_key() before.
369 gen_key_base = 0L
370 def gen_key():
371 global gen_key_base
372 key = '%x' % gen_key_base
373 gen_key_base = gen_key_base + 1
374 return key
376 if sys.platform == "win32":
377 def escape_shell_arg(str):
378 return '"' + string.replace(str, '"', '"^""') + '"'
379 else:
380 def escape_shell_arg(str):
381 return "'" + string.replace(str, "'", "'\\''") + "'"
383 def format_date(date):
384 """Return an svn-compatible date string for DATE (seconds since epoch)."""
385 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
386 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
388 def sort_file(infile, outfile):
389 # sort the log files
391 # GNU sort will sort our dates differently (incorrectly!) if our
392 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
393 # it to 'C'
394 if os.environ.has_key('LC_ALL'):
395 lc_all_tmp = os.environ['LC_ALL']
396 else:
397 lc_all_tmp = None
398 os.environ['LC_ALL'] = 'C'
399 # The -T option to sort has a nice side effect. The Win32 sort is
400 # case insensitive and cannot be used, and since it does not
401 # understand the -T option and dies if we try to use it, there is
402 # no risk that we use that sort by accident.
403 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
404 if lc_all_tmp is None:
405 del os.environ['LC_ALL']
406 else:
407 os.environ['LC_ALL'] = lc_all_tmp
409 def print_node_tree(tree, root_node, indent_depth=0):
410 """For debugging purposes. Prints all nodes in TREE that are
411 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
412 debugging with the print statement in this function."""
413 if not indent_depth:
414 print "TREE", "=" * 75
415 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
416 for key, value in tree[root_node].items():
417 if key[0] == '/': #Skip flags
418 continue
419 print_node_tree(tree, value, (indent_depth + 1))
421 def match_regexp_list(regexp_list, string):
422 """Return 1 if string matches any of the compiled regexps in REGEXP_LIST,
423 else return None."""
424 for regexp in regexp_list:
425 if regexp.match(string):
426 return 1
428 # These constants represent the log levels that this script supports
429 LOG_WARN = -1
430 LOG_QUIET = 0
431 LOG_NORMAL = 1
432 LOG_VERBOSE = 2
433 class Log:
434 """A Simple logging facility. Each line will be timestamped is
435 self.use_timestamps is TRUE. This class is a Borg, see
436 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
437 __shared_state = {}
438 def __init__(self):
439 self.__dict__ = self.__shared_state
440 if self.__dict__:
441 return
442 self.log_level = LOG_NORMAL
443 # Set this to true if you want to see timestamps on each line output.
444 self.use_timestamps = None
445 self.logger = sys.stdout
447 def _timestamp(self):
448 """Output a detailed timestamp at the beginning of each line output."""
449 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
451 def write(self, log_level, *args):
452 """This is the public method to use for writing to a file. Only
453 messages whose LOG_LEVEL is <= self.log_level will be printed. If
454 there are multiple ARGS, they will be separated by a space."""
455 if log_level > self.log_level:
456 return
457 if self.use_timestamps:
458 self._timestamp()
459 self.logger.write(' '.join(map(str,args)) + "\n")
460 # Ensure that log output doesn't get out-of-order with respect to
461 # stderr output.
462 self.logger.flush()
465 class Cleanup:
466 """This singleton class manages any files created by cvs2svn. When
467 you first create a file, call Cleanup.register, passing the
468 filename, and the last pass that you need the file. After the end
469 of that pass, your file will be cleaned up after running an optional
470 callback. This class is a Borg, see
471 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
473 __shared_state = {}
474 def __init__(self):
475 self.__dict__ = self.__shared_state
476 if self.__dict__:
477 return
478 self._log = {}
479 self._callbacks = {}
481 def register(self, file, which_pass, callback=None):
482 """Register FILE for cleanup at the end of WHICH_PASS, running
483 function CALLBACK prior to removal. Registering a given FILE is
484 idempotent; you may register as many times as you wish, but it
485 will only be cleaned up once.
487 Note that if a file is registered multiple times, only the first
488 callback registered for that file will be called at cleanup
489 time. Also note that if you register a database file you must
490 close the database before cleanup, e.g. using a callback."""
491 if not self._log.has_key(which_pass):
492 self._log[which_pass] = {}
493 self._log[which_pass][file] = 1
494 if callback and not self._callbacks.has_key(file):
495 self._callbacks[file] = callback
497 def cleanup(self, which_pass):
498 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
499 if not self._log.has_key(which_pass):
500 return
501 for file in self._log[which_pass].keys():
502 Log().write(LOG_VERBOSE, "Deleting", file)
503 if self._callbacks.has_key(file):
504 self._callbacks[file]()
505 os.unlink(file)
508 # Always use these constants for opening databases.
509 DB_OPEN_READ = 'r'
510 DB_OPEN_NEW = 'n'
512 # A wrapper for anydbm that uses the marshal module to store items as
513 # strings.
514 class Database:
515 def __init__(self, filename, mode):
516 # pybsddb3 has a bug which prevents it from working with
517 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
518 # causes the DB_TRUNCATE flag to be passed, which is disallowed
519 # for databases protected by lock and transaction support
520 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
522 # Therefore, manually perform the removal (we can do this, because
523 # we know that for bsddb - but *not* anydbm in general - the database
524 # consists of one file with the name we specify, rather than several
525 # based on that name).
526 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
527 if os.path.isfile(filename):
528 os.unlink(filename)
529 mode = 'c'
531 self.db = anydbm.open(filename, mode)
533 def has_key(self, key):
534 return self.db.has_key(key)
536 def __getitem__(self, key):
537 return marshal.loads(self.db[key])
539 def __setitem__(self, key, value):
540 self.db[key] = marshal.dumps(value)
542 def __delitem__(self, key):
543 del self.db[key]
545 def get(self, key, default):
546 if self.has_key(key):
547 return self.__getitem__(key)
548 return default
551 class StatsKeeper:
552 __shared_state = { }
553 def __init__(self):
554 self.__dict__ = self.__shared_state
555 if self.__dict__:
556 return
557 self.filename = temp(STATISTICS_FILE)
558 Cleanup().register(self.filename, pass8)
559 # This can get kinda large, so we don't store it in our data dict.
560 self.repos_files = { }
562 if os.path.exists(self.filename):
563 self.unarchive()
564 else:
565 self.data = { 'cvs_revs_count' : 0,
566 'tags': { },
567 'branches' : { },
568 'repos_size' : 0,
569 'repos_file_count' : 0,
570 'svn_rev_count' : None,
571 'first_rev_date' : 1L<<32,
572 'last_rev_date' : 0,
573 'pass_timings' : { },
574 'start_time' : 0,
575 'end_time' : 0,
578 def log_duration_for_pass(self, duration, pass_num):
579 self.data['pass_timings'][pass_num] = duration
581 def set_start_time(self, start):
582 self.data['start_time'] = start
584 def set_end_time(self, end):
585 self.data['end_time'] = end
587 def _bump_item(self, key, amount=1):
588 self.data[key] = self.data[key] + amount
590 def reset_c_rev_info(self):
591 self.data['cvs_revs_count'] = 0
592 self.data['tags'] = { }
593 self.data['branches'] = { }
595 def record_c_rev(self, c_rev):
596 self._bump_item('cvs_revs_count')
598 for tag in c_rev.tags:
599 self.data['tags'][tag] = None
600 for branch in c_rev.branches:
601 self.data['branches'][branch] = None
603 if c_rev.timestamp < self.data['first_rev_date']:
604 self.data['first_rev_date'] = c_rev.timestamp
606 if c_rev.timestamp > self.data['last_rev_date']:
607 self.data['last_rev_date'] = c_rev.timestamp
609 # Only add the size if this is the first time we see the file.
610 if not self.repos_files.has_key(c_rev.fname):
611 self._bump_item('repos_size', c_rev.file_size)
612 self.repos_files[c_rev.fname] = None
614 self.data['repos_file_count'] = len(self.repos_files)
616 def set_svn_rev_count(self, count):
617 self.data['svn_rev_count'] = count
619 def svn_rev_count(self):
620 return self.data['svn_rev_count']
622 def archive(self):
623 open(self.filename, 'w').write(marshal.dumps(self.data))
625 def unarchive(self):
626 self.data = marshal.loads(open(self.filename, 'r').read())
628 def __str__(self):
629 svn_revs_str = ""
630 if self.data['svn_rev_count'] is not None:
631 svn_revs_str = ('Total SVN Commits: %10s\n'
632 % self.data['svn_rev_count'])
634 return ('\n' \
635 'cvs2svn Statistics:\n' \
636 '------------------\n' \
637 'Total CVS Files: %10i\n' \
638 'Total CVS Revisions: %10i\n' \
639 'Total Unique Tags: %10i\n' \
640 'Total Unique Branches: %10i\n' \
641 'CVS Repos Size in KB: %10i\n' \
642 '%s' \
643 'First Revision Date: %s\n' \
644 'Last Revision Date: %s\n' \
645 '------------------' \
646 % (self.data['repos_file_count'],
647 self.data['cvs_revs_count'],
648 len(self.data['tags']),
649 len(self.data['branches']),
650 (self.data['repos_size'] / 1024),
651 svn_revs_str,
652 time.ctime(self.data['first_rev_date']),
653 time.ctime(self.data['last_rev_date']),
656 def timings(self):
657 passes = self.data['pass_timings'].keys()
658 passes.sort()
659 str = 'Timings:\n------------------\n'
661 def desc(val):
662 if val == 1: return "second"
663 return "seconds"
665 for pass_num in passes:
666 duration = int(self.data['pass_timings'][pass_num])
667 p_str = ('pass %d:%6d %s\n'
668 % (pass_num, duration, desc(duration)))
669 str = str + p_str
671 total = int(self.data['end_time'] - self.data['start_time'])
672 str = str + ('total: %6d %s' % (total, desc(total)))
673 return str
676 class LastSymbolicNameDatabase:
677 """ Passing every CVSRevision in s-revs to this class will result in
678 a Database whose key is the last CVS Revision a symbolicname was
679 seen in, and whose value is a list of all symbolicnames that were
680 last seen in that revision."""
681 def __init__(self, mode):
682 self.symbols = {}
683 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
684 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
686 # Once we've gone through all the revs,
687 # symbols.keys() will be a list of all tags and branches, and
688 # their corresponding values will be a key into the last CVS revision
689 # that they were used in.
690 def log_revision(self, c_rev):
691 # Gather last CVS Revision for symbolic name info and tag info
692 for tag in c_rev.tags:
693 self.symbols[tag] = c_rev.unique_key()
694 if c_rev.op is not OP_DELETE:
695 for branch in c_rev.branches:
696 self.symbols[branch] = c_rev.unique_key()
698 # Creates an inversion of symbols above--a dictionary of lists (key
699 # = CVS rev unique_key: val = list of symbols that close in that
700 # rev.
701 def create_database(self):
702 for sym, rev_unique_key in self.symbols.items():
703 if self.symbol_revs_db.has_key(rev_unique_key):
704 ary = self.symbol_revs_db[rev_unique_key]
705 ary.append(sym)
706 self.symbol_revs_db[rev_unique_key] = ary
707 else:
708 self.symbol_revs_db[rev_unique_key] = [sym]
711 class CVSRevisionDatabase:
712 """A Database to store CVSRevision objects and retrieve them by their
713 unique_key()."""
715 def __init__(self, mode):
716 """Initialize an instance, opening database in MODE (like the MODE
717 argument to Database or anydbm.open())."""
718 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
719 Cleanup().register(temp(CVS_REVS_DB), pass8)
721 def log_revision(self, c_rev):
722 """Add C_REV, a CVSRevision, to the database."""
723 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
725 def get_revision(self, unique_key):
726 """Return the CVSRevision stored under UNIQUE_KEY."""
727 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
730 class TagsDatabase(Database):
731 """A Database to store which symbolic names are tags.
732 Each key is a tag name.
733 The value has no meaning, and should be set to None."""
734 def __init__(self, mode):
735 Database.__init__(self, temp(TAGS_DB), mode)
736 Cleanup().register(temp(TAGS_DB), pass8)
739 class CVSRevision:
740 def __init__(self, ctx, *args):
741 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
743 If CTX is None, the following members and methods of the
744 instantiated CVSRevision class object will be unavailable (or
745 simply will not work correctly, if at all):
746 cvs_path
747 svn_path
748 svn_trunk_path
749 is_default_branch_revision()
751 (Note that this class treats CTX as const, because the caller
752 likely passed in a Borg instance of a Ctx. The reason this class
753 takes CTX as as a parameter, instead of just instantiating a Ctx
754 itself, is that this class should be usable outside cvs2svn.)
756 If there is one argument in ARGS, it is a string, in the format of
757 a line from a revs file. Do *not* include a trailing newline.
759 If there are multiple ARGS, there must be 16 of them,
760 comprising a parsed revs line:
761 timestamp --> (int) date stamp for this cvs revision
762 digest --> (string) digest of author+logmsg
763 prev_timestamp --> (int) date stamp for the previous cvs revision
764 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
765 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
766 rev --> (string) this CVS rev, e.g., "1.3"
767 next_rev --> (string or None) next CVS rev, e.g., "1.4"
768 file_in_attic --> (char or None) true if RCS file is in Attic
769 file_executable --> (char or None) true if RCS file has exec bit set.
770 file_size --> (int) size of the RCS file
771 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
772 mode --> (string or None) "kkv", "kb", etc.
773 branch_name --> (string or None) branch on which this rev occurred
774 tags --> (list of strings) all tags on this revision
775 branches --> (list of strings) all branches rooted in this rev
776 fname --> (string) relative path of file in CVS repos
778 The two forms of initialization are equivalent."""
780 self._ctx = ctx
781 if len(args) == 16:
782 (self.timestamp, self.digest, self.prev_timestamp, self.op,
783 self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
784 self.file_executable, self.file_size, self.deltatext_code, self.fname,
785 self.mode, self.branch_name, self.tags, self.branches) = args
786 elif len(args) == 1:
787 data = args[0].split(' ', 14)
788 self.timestamp = int(data[0], 16)
789 self.digest = data[1]
790 if data[2] == "*":
791 self.prev_timestamp = 0
792 else:
793 self.prev_timestamp = int(data[2])
794 self.op = data[3]
795 self.prev_rev = data[4]
796 if self.prev_rev == "*":
797 self.prev_rev = None
798 self.rev = data[5]
799 self.next_rev = data[6]
800 if self.next_rev == "*":
801 self.next_rev = None
802 self.file_in_attic = data[7]
803 if self.file_in_attic == "*":
804 self.file_in_attic = None
805 self.file_executable = data[8]
806 if self.file_executable == "*":
807 self.file_executable = None
808 self.file_size = int(data[9])
809 self.deltatext_code = data[10]
810 self.mode = data[11]
811 if self.mode == "*":
812 self.mode = None
813 self.branch_name = data[12]
814 if self.branch_name == "*":
815 self.branch_name = None
816 ntags = int(data[13])
817 tags = data[14].split(' ', ntags + 1)
818 nbranches = int(tags[ntags])
819 branches = tags[ntags + 1].split(' ', nbranches)
820 self.fname = branches[nbranches]
821 self.tags = tags[:ntags]
822 self.branches = branches[:nbranches]
823 else:
824 raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
825 (len(args) + 1)
826 if ctx is not None:
827 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
828 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
829 self.svn_trunk_path = self._make_path(self.cvs_path)
831 # The 'primary key' of a CVS Revision is the revision number + the
832 # filename. To provide a unique key (say, for a dict), we just glom
833 # them together in a string. By passing in self.prev_rev or
834 # self.next_rev, you can get the unique key for their respective
835 # CVSRevisions.
836 def unique_key(self, revnum=None):
837 if revnum is None:
838 revnum = self.rev
839 return revnum + "/" + self.fname
841 def __str__(self):
842 return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
843 self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
844 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
845 (self.file_in_attic or "*"), (self.file_executable or "*"),
846 self.file_size,
847 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
848 len(self.tags), self.tags and " " or "", " ".join(self.tags),
849 len(self.branches), self.branches and " " or "", " ".join(self.branches),
850 self.fname, ))
852 # Returns true if this CVSRevision is the opening CVSRevision for
853 # NAME (for this RCS file).
854 def opens_symbolic_name(self, name):
855 if name in self.tags:
856 return 1
857 if name in self.branches:
858 # If this c_rev opens a branch and our op is OP_DELETE, then
859 # that means that the file that this c_rev belongs to was
860 # created on the branch, so for all intents and purposes, this
861 # c_rev is *technically* not an opening. See Issue #62 for more
862 # information.
863 if self.op != OP_DELETE:
864 return 1
865 return 0
867 def is_default_branch_revision(self):
868 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
869 revision according to DEFAULT_BRANCHES_DB (see the conditions
870 documented there), else return None."""
871 if self._ctx._default_branches_db.has_key(self.cvs_path):
872 val = self._ctx._default_branches_db[self.cvs_path]
873 val_last_dot = val.rindex(".")
874 our_last_dot = self.rev.rindex(".")
875 default_branch = val[:val_last_dot]
876 our_branch = self.rev[:our_last_dot]
877 default_rev_component = int(val[val_last_dot + 1:])
878 our_rev_component = int(self.rev[our_last_dot + 1:])
879 if (default_branch == our_branch
880 and our_rev_component <= default_rev_component):
881 return 1
882 # else
883 return None
885 def _make_path(self, path, branch_name = None):
886 """Return the trunk path or branch path for PATH.
888 If PATH is None, return None."""
889 # For a while, we treated each top-level subdir of the CVS
890 # repository as a "project root" and interpolated the appropriate
891 # genealogy (trunk|tag|branch) in according to the official
892 # recommended layout. For example, the path '/foo/bar/baz.c' on
893 # branch 'Rel2' would become
895 # /foo/branches/Rel2/bar/baz.c
897 # and on trunk it would become
899 # /foo/trunk/bar/baz.c
901 # However, we went back to the older and simpler method of just
902 # prepending the genealogy to the front, instead of interpolating.
903 # So now we produce:
905 # /branches/Rel2/foo/bar/baz.c
906 # /trunk/foo/bar/baz.c
908 # Why? Well, Jack Repenning pointed out that this way is much
909 # friendlier to "anonymously rooted subtrees" (that's a tree where
910 # the name of the top level dir doesn't matter, the point is that if
911 # you cd into it and, say, run 'make', something good will happen).
912 # By interpolating, we made it impossible to point cvs2svn at some
913 # subdir in the CVS repository and convert it as a project, because
914 # we'd treat every subdir underneath it as an independent project
915 # root, which is probably not what the user wanted.
917 # Also, see Blair Zajac's post
919 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
921 # and the surrounding thread, for why what people really want is a
922 # way of specifying an in-repository prefix path, not interpolation.
923 if path is None:
924 return None
926 if branch_name:
927 branch_name = _clean_symbolic_name(branch_name)
928 return self._ctx.branches_base + '/' + branch_name + '/' + path
929 else:
930 return self._ctx.trunk_base + '/' + path
932 def rcs_path(self):
933 """Returns the actual filesystem path to the RCS file of this
934 CVSRevision."""
935 if self.file_in_attic is None:
936 return self.fname
937 else:
938 basepath, filename = os.path.split(self.fname)
939 return os.path.join(basepath, 'Attic', filename)
941 def filename(self):
942 "Return the last path component of self.fname, minus the ',v'"
943 return os.path.split(self.fname)[-1][:-2]
945 class SymbolDatabase:
946 """This database records information on all symbols in the RCS
947 files. It is created in pass 1 and it is used in pass 2."""
948 def __init__(self):
949 # A hash that maps tag names to commit counts
950 self.tags = { }
951 # A hash that maps branch names to lists of the format
952 # [ create_count, commit_count, blockers ], where blockers
953 # is a hash that lists the symbols that depend on the
954 # the branch. The blockers hash is used as a set, so the
955 # values are not used.
956 self.branches = { }
958 def register_tag_creation(self, name):
959 """Register the creation of the tag NAME."""
960 if not self.tags.has_key(name):
961 self.tags[name] = 0
962 self.tags[name] += 1
964 def _branch(self, name):
965 """Helper function to get a branch node that will create and
966 initialize the node if it does not exist."""
967 if not self.branches.has_key(name):
968 self.branches[name] = [ 0, 0, { } ]
969 return self.branches[name]
971 def register_branch_creation(self, name):
972 """Register the creation of the branch NAME."""
973 self._branch(name)[0] += 1
975 def register_branch_commit(self, name):
976 """Register a commit on the branch NAME."""
977 self._branch(name)[1] += 1
979 def register_branch_blocker(self, name, blocker):
980 """Register BLOCKER as a blocker on the branch NAME."""
981 self._branch(name)[2][blocker] = None
983 def branch_has_commit(self, name):
984 """Return non-zero if NAME has commits. Returns 0 if name
985 is not a branch or if it has no commits."""
986 return self.branches.has_key(name) and self.branches[name][1]
988 def find_excluded_symbols(self, regexp_list):
989 """Returns a hash of all symbols thaht match the regexps in
990 REGEXP_LISTE. The hash is used as a set so the values are
991 not used."""
992 excludes = { }
993 for tag in self.tags.keys():
994 if match_regexp_list(regexp_list, tag):
995 excludes[tag] = None
996 for branch in self.branches.keys():
997 if match_regexp_list(regexp_list, branch):
998 excludes[branch] = None
999 return excludes
1001 def find_branch_exclude_blockers(self, branch, excludes):
1002 """Find all blockers of BRANCH, excluding the ones in the hash
1003 EXCLUDES."""
1004 blockers = { }
1005 if excludes.has_key(branch):
1006 for blocker in self.branches[branch][2]:
1007 if not excludes.has_key(blocker):
1008 blockers[blocker] = None
1009 return blockers
1011 def find_blocked_excludes(self, excludes):
1012 """Find all branches not in EXCLUDES that have blocking symbols that
1013 are not themselves excluded. Return a hash that maps branch names
1014 to a hash of blockers. The hash of blockes is used as a set so the
1015 values are not used."""
1016 blocked_branches = { }
1017 for branch in self.branches.keys():
1018 blockers = self.find_branch_exclude_blockers(branch, excludes)
1019 if blockers:
1020 blocked_branches[branch] = blockers
1021 return blocked_branches
1023 def find_mismatches(self, excludes=None):
1024 """Find all symbols that are defined as both tags and branches,
1025 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1026 the symbol name, tag count, branch count and commit count."""
1027 if excludes is None:
1028 excludes = { }
1029 mismatches = [ ]
1030 for branch in self.branches.keys():
1031 if not excludes.has_key(branch) and self.tags.has_key(branch):
1032 mismatches.append((branch, # name
1033 self.tags[branch], # tag count
1034 self.branches[branch][0], # branch count
1035 self.branches[branch][1])) # commit count
1036 return mismatches
1038 def read(self):
1039 """Read the symbol database from files."""
1040 f = open(temp(TAGS_LIST))
1041 while 1:
1042 line = f.readline()
1043 if not line:
1044 break
1045 tag, count = line.split()
1046 self.tags[tag] = int(count)
1048 f = open(temp(BRANCHES_LIST))
1049 while 1:
1050 line = f.readline()
1051 if not line:
1052 break
1053 words = line.split()
1054 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1055 for blocker in words[3:]:
1056 self.branches[words[0]][2][blocker] = None
1058 def write(self):
1059 """Store the symbol database to files."""
1060 f = open(temp(TAGS_LIST), "w")
1061 Cleanup().register(temp(TAGS_LIST), pass2)
1062 for tag, count in self.tags.items():
1063 f.write("%s %d\n" % (tag, count))
1065 f = open(temp(BRANCHES_LIST), "w")
1066 Cleanup().register(temp(BRANCHES_LIST), pass2)
1067 for branch, info in self.branches.items():
1068 f.write("%s %d %d" % (branch, info[0], info[1]))
1069 if info[2]:
1070 f.write(" ")
1071 f.write(" ".join(info[2].keys()))
1072 f.write("\n")
1074 class CollectData(cvs2svn_rcsparse.Sink):
1075 def __init__(self):
1076 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1077 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1078 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1079 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1080 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
1081 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1082 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1083 Cleanup().register(temp(METADATA_DB), pass8)
1084 self.fatal_errors = []
1085 self.num_files = 0
1086 self.symbol_db = SymbolDatabase()
1088 # 1 if we've collected data for at least one file, None otherwise.
1089 self.found_valid_file = None
1091 # See set_fname() for initializations of other variables.
1093 def set_fname(self, canonical_name, filename):
1094 """Prepare to receive data for FILENAME. FILENAME is the absolute
1095 filesystem path to the file in question, and CANONICAL_NAME is
1096 FILENAME with the 'Attic' component removed (if the file is indeed
1097 in the Attic) ."""
1098 self.fname = canonical_name
1100 # We calculate and save some file metadata here, where we can do
1101 # it only once per file, instead of waiting until later where we
1102 # would have to do the same calculations once per CVS *revision*.
1104 self.rel_name = relative_name(Ctx().cvsroot, self.fname)[:-2]
1106 # If the paths are not the same, then that means that the
1107 # canonical_name has had the 'Attic' component stripped out.
1108 self.file_in_attic = None
1109 if not canonical_name == filename:
1110 self.file_in_attic = 1
1112 file_stat = os.stat(filename)
1113 # The size of our file in bytes
1114 self.file_size = file_stat[stat.ST_SIZE]
1116 # Whether or not the executable bit is set.
1117 self.file_executable = None
1118 if file_stat[0] & stat.S_IXUSR:
1119 self.file_executable = 1
1121 # revision -> [timestamp, author, old-timestamp]
1122 self.rev_data = { }
1124 # Maps revision number (key) to the revision number of the
1125 # previous revision along this line of development.
1127 # For the first revision R on a branch, we consider the revision
1128 # from which R sprouted to be the 'previous'.
1130 # Note that this revision can't be determined arithmetically (due
1131 # to cvsadmin -o, which is why this is necessary).
1132 self.prev_rev = { }
1134 # This dict is essentially self.prev_rev with the values mapped in
1135 # the other direction, so following key -> value will yield you
1136 # the next revision number
1137 self.next_rev = { }
1139 # Track the state of each revision so that in set_revision_info,
1140 # we can determine if our op is an add/change/delete. We can do
1141 # this because in set_revision_info, we'll have all of the
1142 # revisions for a file at our fingertips, and we need to examine
1143 # the state of our prev_rev to determine if we're an add or a
1144 # change--without the state of the prev_rev, we are unable to
1145 # distinguish between an add and a change.
1146 self.rev_state = { }
1148 # Hash mapping branch numbers, like '1.7.2', to branch names,
1149 # like 'Release_1_0_dev'.
1150 self.branch_names = { }
1152 # RCS flags (used for keyword expansion).
1153 self.mode = None
1155 # Hash mapping revision numbers, like '1.7', to lists of names
1156 # indicating which branches sprout from that revision, like
1157 # ['Release_1_0_dev', 'experimental_driver', ...].
1158 self.branchlist = { }
1160 # Like self.branchlist, but the values are lists of tag names that
1161 # apply to the key revision.
1162 self.taglist = { }
1164 # If set, this is an RCS branch number -- rcsparse calls this the
1165 # "principal branch", but CVS and RCS refer to it as the "default
1166 # branch", so that's what we call it, even though the rcsparse API
1167 # setter method is still 'set_principal_branch'.
1168 self.default_branch = None
1170 # If the RCS file doesn't have a default branch anymore, but does
1171 # have vendor revisions, then we make an educated guess that those
1172 # revisions *were* the head of the default branch up until the
1173 # commit of 1.2, at which point the file's default branch became
1174 # trunk. This records the date at which 1.2 was committed.
1175 self.first_non_vendor_revision_date = None
1177 # A list of all symbols defined for the current file. Used to
1178 # prevent multiple definitions of a symbol, something which can
1179 # easily happen when --symbol-transform is used.
1180 self.defined_symbols = [ ]
1182 def set_principal_branch(self, branch):
1183 self.default_branch = branch
1185 def set_expansion(self, mode):
1186 self.mode = mode
1188 def set_branch_name(self, branch_number, name):
1189 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1190 and that NAME sprouts from BRANCH_NUMBER .
1191 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1192 for example '1.7.2' (never '1.7.0.2')."""
1193 if not self.branch_names.has_key(branch_number):
1194 self.branch_names[branch_number] = name
1195 # The branchlist is keyed on the revision number from which the
1196 # branch sprouts, so strip off the odd final component.
1197 sprout_rev = branch_number[:branch_number.rfind(".")]
1198 if not self.branchlist.has_key(sprout_rev):
1199 self.branchlist[sprout_rev] = []
1200 self.branchlist[sprout_rev].append(name)
1201 self.symbol_db.register_branch_creation(name)
1202 else:
1203 sys.stderr.write("%s: in '%s':\n"
1204 " branch '%s' already has name '%s',\n"
1205 " cannot also have name '%s', ignoring the latter\n"
1206 % (warning_prefix, self.fname, branch_number,
1207 self.branch_names[branch_number], name))
1209 def rev_to_branch_name(self, revision):
1210 """Return the name of the branch on which REVISION lies.
1211 REVISION is a non-branch revision number with an even number of,
1212 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1213 For the convenience of callers, REVISION can also be a trunk
1214 revision such as '1.2', in which case just return None."""
1215 if trunk_rev.match(revision):
1216 return None
1217 return self.branch_names.get(revision[:revision.rindex(".")])
1219 def add_cvs_branch(self, revision, branch_name):
1220 """Record the root revision and branch revision for BRANCH_NAME,
1221 based on REVISION. REVISION is a CVS branch number having an even
1222 number of components where the second-to-last is '0'. For
1223 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1224 from 1.7 and has branch number 1.7.2."""
1225 last_dot = revision.rfind(".")
1226 branch_rev = revision[:last_dot]
1227 last2_dot = branch_rev.rfind(".")
1228 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1229 self.set_branch_name(branch_rev, branch_name)
1231 def define_tag(self, name, revision):
1232 """Record a bidirectional mapping between symbolic NAME and REVISION.
1233 REVISION is an unprocessed revision number from the RCS file's
1234 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1235 This function will determine what kind of symbolic name it is by
1236 inspection, and record it in the right places."""
1237 for (pattern, replacement) in Ctx().symbol_transforms:
1238 newname = re.sub(pattern, replacement, name)
1239 if newname != name:
1240 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1241 % (name, newname))
1242 name = newname
1243 if name in self.defined_symbols:
1244 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1245 % (error_prefix, name, self.fname)
1246 sys.stderr.write(err + "\n")
1247 self.fatal_errors.append(err)
1248 self.defined_symbols.append(name)
1249 if branch_tag.match(revision):
1250 self.add_cvs_branch(revision, name)
1251 elif vendor_tag.match(revision):
1252 self.set_branch_name(revision, name)
1253 else:
1254 if not self.taglist.has_key(revision):
1255 self.taglist[revision] = []
1256 self.taglist[revision].append(name)
1257 self.symbol_db.register_tag_creation(name)
1259 def define_revision(self, revision, timestamp, author, state,
1260 branches, next):
1262 # Record the state of our revision for later calculations
1263 self.rev_state[revision] = state
1265 # store the rev_data as a list in case we have to jigger the timestamp
1266 self.rev_data[revision] = [int(timestamp), author, None]
1268 # When on trunk, the RCS 'next' revision number points to what
1269 # humans might consider to be the 'previous' revision number. For
1270 # example, 1.3's RCS 'next' is 1.2.
1272 # However, on a branch, the RCS 'next' revision number really does
1273 # point to what humans would consider to be the 'next' revision
1274 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1276 # In other words, in RCS, 'next' always means "where to find the next
1277 # deltatext that you need this revision to retrieve.
1279 # That said, we don't *want* RCS's behavior here, so we determine
1280 # whether we're on trunk or a branch and set self.prev_rev
1281 # accordingly.
1283 # One last thing. Note that if REVISION is a branch revision,
1284 # instead of mapping REVISION to NEXT, we instead map NEXT to
1285 # REVISION. Since we loop over all revisions in the file before
1286 # doing anything with the data we gather here, this 'reverse
1287 # assignment' effectively does the following:
1289 # 1. Gives us no 'prev' value for REVISION (in this
1290 # iteration... it may have been set in a previous iteration)
1292 # 2. Sets the 'prev' value for the revision with number NEXT to
1293 # REVISION. So when we come around to the branch revision whose
1294 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1295 # set.
1296 if trunk_rev.match(revision):
1297 self.prev_rev[revision] = next
1298 self.next_rev[next] = revision
1299 elif next:
1300 self.prev_rev[next] = revision
1301 self.next_rev[revision] = next
1303 for b in branches:
1304 self.prev_rev[b] = revision
1306 # Ratchet up the highest vendor head revision, if necessary.
1307 if self.default_branch:
1308 default_branch_root = self.default_branch + "."
1309 if ((revision.find(default_branch_root) == 0)
1310 and (default_branch_root.count('.') == revision.count('.'))):
1311 # This revision is on the default branch, so record that it is
1312 # the new highest default branch head revision.
1313 self.default_branches_db[self.rel_name] = revision
1314 else:
1315 # No default branch, so make an educated guess.
1316 if revision == '1.2':
1317 # This is probably the time when the file stopped having a
1318 # default branch, so make a note of it.
1319 self.first_non_vendor_revision_date = timestamp
1320 else:
1321 m = vendor_revision.match(revision)
1322 if m and ((not self.first_non_vendor_revision_date)
1323 or (timestamp < self.first_non_vendor_revision_date)):
1324 # We're looking at a vendor revision, and it wasn't
1325 # committed after this file lost its default branch, so bump
1326 # the maximum trunk vendor revision in the permanent record.
1327 self.default_branches_db[self.rel_name] = revision
1329 if not trunk_rev.match(revision):
1330 # Check for unlabeled branches, record them. We tried to collect
1331 # all branch names when we parsed the symbolic name header
1332 # earlier, of course, but that didn't catch unlabeled branches.
1333 # If a branch is unlabeled, this is our first encounter with it,
1334 # so we have to record its data now.
1335 branch_number = revision[:revision.rindex(".")]
1336 if not self.branch_names.has_key(branch_number):
1337 branch_name = "unlabeled-" + branch_number
1338 self.set_branch_name(branch_number, branch_name)
1340 # Register the commit on this non-trunk branch
1341 branch_name = self.branch_names[branch_number]
1342 self.symbol_db.register_branch_commit(branch_name)
1344 def tree_completed(self):
1345 "The revision tree has been parsed. Analyze it for consistency."
1347 # Our algorithm depends upon the timestamps on the revisions occuring
1348 # monotonically over time. That is, we want to see rev 1.34 occur in
1349 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1350 # sorting), and then tried to insert 1.34, we'd be screwed.
1352 # to perform the analysis, we'll simply visit all of the 'previous'
1353 # links that we have recorded and validate that the timestamp on the
1354 # previous revision is before the specified revision
1356 # if we have to resync some nodes, then we restart the scan. just keep
1357 # looping as long as we need to restart.
1358 while 1:
1359 for current, prev in self.prev_rev.items():
1360 if not prev:
1361 # no previous revision exists (i.e. the initial revision)
1362 continue
1363 t_c = self.rev_data[current][0]
1364 t_p = self.rev_data[prev][0]
1365 if t_p >= t_c:
1366 # the previous revision occurred later than the current revision.
1367 # shove the previous revision back in time (and any before it that
1368 # may need to shift).
1370 # We sync backwards and not forwards because any given CVS
1371 # Revision has only one previous revision. However, a CVS
1372 # Revision can *be* a previous revision for many other
1373 # revisions (e.g., a revision that is the source of multiple
1374 # branches). This becomes relevant when we do the secondary
1375 # synchronization in pass 2--we can make certain that we
1376 # don't resync a revision earlier than it's previous
1377 # revision, but it would be non-trivial to make sure that we
1378 # don't resync revision R *after* any revisions that have R
1379 # as a previous revision.
1380 while t_p >= t_c:
1381 self.rev_data[prev][0] = t_c - 1 # new timestamp
1382 self.rev_data[prev][2] = t_p # old timestamp
1383 delta = t_c - 1 - t_p
1384 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1385 % (self.rel_name,
1386 prev, time.ctime(t_p), delta)
1387 Log().write(LOG_VERBOSE, msg)
1388 if (delta > COMMIT_THRESHOLD
1389 or delta < (COMMIT_THRESHOLD * -1)):
1390 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1391 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1392 delta))
1393 current = prev
1394 prev = self.prev_rev[current]
1395 if not prev:
1396 break
1397 t_c = t_c - 1 # self.rev_data[current][0]
1398 t_p = self.rev_data[prev][0]
1400 # break from the for-loop
1401 break
1402 else:
1403 # finished the for-loop (no resyncing was performed)
1404 return
1406 def set_revision_info(self, revision, log, text):
1407 timestamp, author, old_ts = self.rev_data[revision]
1408 digest = sha.new(log + '\0' + author).hexdigest()
1409 if old_ts:
1410 # the timestamp on this revision was changed. log it for later
1411 # resynchronization of other files's revisions that occurred
1412 # for this time and log message.
1413 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1415 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1416 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1418 # If revision 1.1 appears to have been created via 'cvs add'
1419 # instead of 'cvs import', then this file probably never had a
1420 # default branch, so retroactively remove its record in the
1421 # default branches db. The test is that the log message CVS uses
1422 # for 1.1 in imports is "Initial revision\n" with no period.
1423 if revision == '1.1' and log != 'Initial revision\n':
1424 if self.default_branches_db.has_key(self.rel_name):
1425 del self.default_branches_db[self.rel_name]
1427 # Get the timestamp of the previous revision
1428 prev_rev = self.prev_rev.get(revision, None)
1429 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1431 # How to tell if a CVSRevision is an add, a change, or a deletion:
1433 # It's a delete if RCS state is 'dead'
1435 # It's an add if RCS state is 'Exp.' and
1436 # - we either have no previous revision
1437 # or
1438 # - we have a previous revision whose state is 'dead'
1440 # Anything else is a change.
1441 if self.rev_state[revision] == 'dead':
1442 op = OP_DELETE
1443 elif ((self.prev_rev.get(revision, None) is None)
1444 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1445 op = OP_ADD
1446 else:
1447 op = OP_CHANGE
1449 if text:
1450 deltatext_code = DELTATEXT_NONEMPTY
1451 else:
1452 deltatext_code = DELTATEXT_EMPTY
1454 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
1455 self.prev_rev[revision], revision,
1456 self.next_rev.get(revision),
1457 self.file_in_attic, self.file_executable,
1458 self.file_size,
1459 deltatext_code, self.fname,
1460 self.mode, self.rev_to_branch_name(revision),
1461 self.taglist.get(revision, []),
1462 self.branchlist.get(revision, []))
1463 self.revs.write(str(c_rev) + "\n")
1464 StatsKeeper().record_c_rev(c_rev)
1466 if not self.metadata_db.has_key(digest):
1467 self.metadata_db[digest] = (author, log)
1469 def parse_completed(self):
1470 # Walk through all branches and tags and register them with
1471 # their parent branch in the symbol database.
1472 for revision, symbols in self.taglist.items() + self.branchlist.items():
1473 for symbol in symbols:
1474 name = self.rev_to_branch_name(revision)
1475 if name is not None:
1476 self.symbol_db.register_branch_blocker(name, symbol)
1478 self.num_files = self.num_files + 1
1480 def write_symbol_db(self):
1481 self.symbol_db.write()
1483 class SymbolingsLogger:
1484 """Manage the file that contains lines for symbol openings and
1485 closings.
1487 This data will later be used to determine valid SVNRevision ranges
1488 from which a file can be copied when creating a branch or tag in
1489 Subversion. Do this by finding "Openings" and "Closings" for each
1490 file copied onto a branch or tag.
1492 An "Opening" is the CVSRevision from which a given branch/tag
1493 sprouts on a path.
1495 The "Closing" for that branch/tag and path is the next CVSRevision
1496 on the same line of development as the opening.
1498 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1499 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1500 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1501 'foo.c'. Note that there may be many revisions chronologically
1502 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1503 perhaps even including on branch BEE itself. But 1.3 is the next
1504 revision *on the same line* as 1.2, that is why it is the closing
1505 revision for those symbolic names of which 1.2 is the opening.
1507 The reason for doing all this hullabaloo is to make branch and tag
1508 creation as efficient as possible by minimizing the number of copies
1509 and deletes per creation. For example, revisions 1.2 and 1.3 of
1510 foo.c might correspond to revisions 17 and 30 in Subversion. That
1511 means that when creating branch BEE, there is some motivation to do
1512 the copy from one of 17-30. Now if there were another file,
1513 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1514 to revisions 24 and 39 in Subversion, we would know that the ideal
1515 thing would be to copy the branch from somewhere between 24 and 29,
1516 inclusive.
1518 def __init__(self):
1519 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1520 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1521 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1522 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1524 # This keys of this dictionary are Subversion repository *source*
1525 # paths for which we've encountered an 'opening'. The values are
1526 # the symbolic names that this path has opened. The only paths
1527 # that should be in this dict are paths whose corresponding
1528 # CVSRevision is a default branch revision.
1529 self.open_paths_with_default_branches = { }
1531 def log_revision(self, c_rev, svn_revnum):
1532 """Log any openings found in C_REV, and if C_REV.next_rev is not
1533 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1534 any) will have its revnum determined later."""
1535 for name in c_rev.tags + c_rev.branches:
1536 name = _clean_symbolic_name(name)
1537 self._note_default_branch_opening(c_rev, name)
1538 if c_rev.op != OP_DELETE:
1539 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1541 # If our c_rev has a next_rev, then that's the closing rev for
1542 # this source revision. Log it to closings for later processing
1543 # since we don't know the svn_revnum yet.
1544 if c_rev.next_rev is not None:
1545 self.closings.write('%s %s\n' %
1546 (name, c_rev.unique_key(c_rev.next_rev)))
1548 def _log(self, name, svn_revnum, svn_path, type):
1549 """Write out a single line to the symbol_openings_closings file
1550 representing that svn_revnum of svn_path is either the opening or
1551 closing (TYPE) of NAME (a symbolic name).
1553 TYPE should only be one of the following global constants:
1554 OPENING or CLOSING."""
1555 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1556 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1557 type, svn_path))
1559 def close(self):
1560 """Iterate through the closings file, lookup the svn_revnum for
1561 each closing CVSRevision, and write a proper line out to the
1562 symbolings file."""
1563 # Use this to get the c_rev.svn_path of our rev_key
1564 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1566 self.closings.close()
1567 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1568 (name, rev_key) = line.rstrip().split(" ", 1)
1569 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1571 c_rev = cvs_revs_db.get_revision(rev_key)
1572 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1574 self.symbolings.close()
1576 def _note_default_branch_opening(self, c_rev, symbolic_name):
1577 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1578 as an opening for SYMBOLIC_NAME."""
1579 path = c_rev.svn_trunk_path
1580 if not self.open_paths_with_default_branches.has_key(path):
1581 self.open_paths_with_default_branches[path] = [ ]
1582 self.open_paths_with_default_branches[path].append(symbolic_name)
1584 def log_default_branch_closing(self, c_rev, svn_revnum):
1585 """If self.open_paths_with_default_branches contains
1586 C_REV.svn_trunk_path, then call log each name in
1587 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1588 closing with SVN_REVNUM as the closing revision number. """
1589 path = c_rev.svn_trunk_path
1590 if self.open_paths_with_default_branches.has_key(path):
1591 # log each symbol as a closing
1592 for name in self.open_paths_with_default_branches[path]:
1593 self._log(name, svn_revnum, path, CLOSING)
1594 # Remove them from the openings list as we're done with them.
1595 del self.open_paths_with_default_branches[path]
1598 class PersistenceManager:
1599 """The PersistenceManager allows us to effectively store SVNCommits
1600 to disk and retrieve them later using only their subversion revision
1601 number as the key. It also returns the subversion revision number
1602 for a given CVSRevision's unique key.
1604 All information pertinent to each SVNCommit is stored in a series of
1605 on-disk databases so that SVNCommits can be retrieved on-demand.
1607 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1608 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1609 databases and be fully-featured.
1610 In 'read' mode, PersistenceManager will open existing on-disk databases
1611 and the set_* methods will be unavailable."""
1612 def __init__(self, mode):
1613 self.mode = mode
1614 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1615 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1616 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1617 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1618 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1619 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1620 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1621 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1622 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1623 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1624 ###PERF kff Elsewhere there are comments about sucking the tags db
1625 ### into memory. That seems like a good idea.
1626 if not Ctx().trunk_only:
1627 self.tags_db = TagsDatabase(DB_OPEN_READ)
1628 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
1629 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1631 # "branch_name" -> svn_revnum in which branch was last filled.
1632 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1633 # revision which would have nothing to do.
1634 self.last_filled = {}
1636 def get_svn_revnum(self, cvs_rev_unique_key):
1637 """Return the Subversion revision number in which
1638 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1639 is no mapping for CVS_REV_UNIQUE_KEY."""
1640 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1642 def get_svn_commit(self, svn_revnum):
1643 """Return an SVNCommit that corresponds to SVN_REVNUM.
1645 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1647 This method can throw SVNCommitInternalInconsistencyError.
1649 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1650 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1651 if c_rev_keys == None:
1652 return None
1654 digest = None
1655 for key in c_rev_keys:
1656 c_rev = self.cvs_revisions.get_revision(key)
1657 svn_commit.add_revision(c_rev)
1658 # Set the author and log message for this commit by using
1659 # CVSRevision metadata, but only if haven't done so already.
1660 if digest is None:
1661 digest = c_rev.digest
1662 author, log_msg = self.svn_commit_metadata[digest]
1663 svn_commit.set_author(author)
1664 svn_commit.set_log_msg(log_msg)
1666 # If we're doing a trunk-only conversion, we don't need to do any more work.
1667 if Ctx().trunk_only:
1668 return svn_commit
1670 name, date = self._get_name_and_date(svn_revnum)
1671 if name:
1672 svn_commit.set_symbolic_name(name)
1673 svn_commit.set_date(date)
1674 if self.tags_db.has_key(name):
1675 svn_commit.is_tag = 1
1677 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1678 if motivating_revnum:
1679 svn_commit.set_motivating_revnum(int(motivating_revnum))
1680 svn_commit.set_date(date)
1682 if len(svn_commit.cvs_revs) and name:
1683 msg = """An SVNCommit cannot have cvs_revisions *and* a
1684 corresponding symbolic name ('%s') to fill.""" % name
1685 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1687 return svn_commit
1689 def set_cvs_revs(self, svn_revnum, cvs_revs):
1690 """Record the bidirectional mapping between SVN_REVNUM and
1691 CVS_REVS."""
1692 if self.mode == DB_OPEN_READ:
1693 raise RuntimeError, \
1694 'Write operation attempted on read-only PersistenceManager'
1695 for c_rev in cvs_revs:
1696 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1697 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1698 for c_rev in cvs_revs:
1699 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1701 def set_name_and_date(self, svn_revnum, name, date):
1702 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1703 if self.mode == DB_OPEN_READ:
1704 raise RuntimeError, \
1705 'Write operation attempted on read-only PersistenceManager'
1706 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1707 self.last_filled[name] = svn_revnum
1709 def _get_name_and_date(self, svn_revnum):
1710 """Return a tuple containing the symbolic name and date associated
1711 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1712 associated with it."""
1713 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1715 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1716 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1717 if self.mode == DB_OPEN_READ:
1718 raise RuntimeError, \
1719 'Write operation attempted on read-only PersistenceManager'
1720 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1723 class CVSCommit:
1724 """Each instance of this class contains a number of CVS Revisions
1725 that correspond to one or more Subversion Commits. After all CVS
1726 Revisions are added to the grouping, calling process_revisions will
1727 generate a Subversion Commit (or Commits) for the set of CVS
1728 Revisions in the grouping."""
1730 def __init__(self, digest, author, log):
1731 self.digest = digest
1732 self.author = author
1733 self.log = log
1735 # Symbolic names for which the last source revision has already
1736 # been seen and for which the CVSRevisionAggregator has already
1737 # generated a fill SVNCommit. See self.process_revisions().
1738 self.done_symbols = [ ]
1740 self.files = { }
1741 # Lists of CVSRevisions
1742 self.changes = [ ]
1743 self.deletes = [ ]
1745 # Start out with a t_min higher than any incoming time T, and a
1746 # t_max lower than any incoming T. This way the first T will
1747 # push t_min down to T, and t_max up to T, naturally (without any
1748 # special-casing), and successive times will then ratchet them
1749 # outward as appropriate.
1750 self.t_min = 1L<<32
1751 self.t_max = 0
1753 # This will be set to the SVNCommit that occurs in self._commit.
1754 self.motivating_commit = None
1756 # This is a list of all non-primary commits motivated by the main
1757 # commit. We gather these so that we can set their dates to the
1758 # same date as the primary commit.
1759 self.secondary_commits = [ ]
1761 # State for handling default branches.
1763 # Here is a tempting, but ultimately nugatory, bit of logic, which
1764 # I share with you so you may appreciate the less attractive, but
1765 # refreshingly non-nugatory, logic which follows it:
1767 # If some of the commits in this txn happened on a non-trunk
1768 # default branch, then those files will have to be copied into
1769 # trunk manually after being changed on the branch (because the
1770 # RCS "default branch" appears as head, i.e., trunk, in practice).
1771 # As long as those copies don't overwrite any trunk paths that
1772 # were also changed in this commit, then we can do the copies in
1773 # the same revision, because they won't cover changes that don't
1774 # appear anywhere/anywhen else. However, if some of the trunk dst
1775 # paths *did* change in this commit, then immediately copying the
1776 # branch changes would lose those trunk mods forever. So in this
1777 # case, we need to do at least that copy in its own revision. And
1778 # for simplicity's sake, if we're creating the new revision for
1779 # even one file, then we just do all such copies together in the
1780 # new revision.
1782 # Doesn't that sound nice?
1784 # Unfortunately, Subversion doesn't support copies with sources
1785 # in the current txn. All copies must be based in committed
1786 # revisions. Therefore, we generate the above-described new
1787 # revision unconditionally.
1789 # This is a list of c_revs, and a c_rev is appended for each
1790 # default branch commit that will need to be copied to trunk (or
1791 # deleted from trunk) in some generated revision following the
1792 # "regular" revision.
1793 self.default_branch_cvs_revisions = [ ]
1795 def __cmp__(self, other):
1796 # Commits should be sorted by t_max. If both self and other have
1797 # the same t_max, break the tie using t_min, and lastly, digest
1798 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1799 or cmp(self.digest, other.digest))
1801 def has_file(self, fname):
1802 return self.files.has_key(fname)
1804 def revisions(self):
1805 return self.changes + self.deletes
1807 def opens_symbolic_name(self, name):
1808 """Returns true if any CVSRevision in this commit is on a tag or a
1809 branch or is the origin of a tag or branch."""
1810 for c_rev in self.revisions():
1811 if c_rev.opens_symbolic_name(name):
1812 return 1
1813 return 0
1815 def add_revision(self, c_rev):
1816 # Record the time range of this commit.
1818 # ### ISSUE: It's possible, though unlikely, that the time range
1819 # of a commit could get gradually expanded to be arbitrarily
1820 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1821 # problem, and anyway deciding where to break it up would be a
1822 # judgement call. For now, we just print a warning in commit() if
1823 # this happens.
1824 if c_rev.timestamp < self.t_min:
1825 self.t_min = c_rev.timestamp
1826 if c_rev.timestamp > self.t_max:
1827 self.t_max = c_rev.timestamp
1829 if c_rev.op == OP_DELETE:
1830 self.deletes.append(c_rev)
1831 else:
1832 # OP_CHANGE or OP_ADD
1833 self.changes.append(c_rev)
1835 self.files[c_rev.fname] = 1
1837 def _pre_commit(self):
1838 """Generates any SVNCommits that must exist before the main
1839 commit."""
1841 # There may be multiple c_revs in this commit that would cause
1842 # branch B to be filled, but we only want to fill B once. On the
1843 # other hand, there might be multiple branches committed on in
1844 # this commit. Whatever the case, we should count exactly one
1845 # commit per branch, because we only fill a branch once per
1846 # CVSCommit. This list tracks which branches we've already
1847 # counted.
1848 accounted_for_sym_names = [ ]
1850 def fill_needed(c_rev, pm):
1851 """Return 1 if this is the first commit on a new branch (for
1852 this file) and we need to fill the branch; else return 0
1853 (meaning that some other file's first commit on the branch has
1854 already done the fill for us).
1856 If C_REV.op is OP_ADD, only return 1 if the branch that this
1857 commit is on has no last filled revision.
1859 PM is a PersistenceManager to query.
1862 # Different '.' counts indicate that c_rev is now on a different
1863 # line of development (and may need a fill)
1864 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1865 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1866 # It should be the case that when we have a file F that
1867 # is added on branch B (thus, F on trunk is in state
1868 # 'dead'), we generate an SVNCommit to fill B iff the branch
1869 # has never been filled before.
1871 # If this c_rev.op == OP_ADD, *and* the branch has never
1872 # been filled before, then fill it now. Otherwise, no need to
1873 # fill it.
1874 if c_rev.op == OP_ADD:
1875 if pm.last_filled.get(c_rev.branch_name, None) is None:
1876 return 1
1877 else:
1878 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1879 return 1
1880 return 0
1882 for c_rev in self.changes + self.deletes:
1883 # If a commit is on a branch, we must ensure that the branch
1884 # path being committed exists (in HEAD of the Subversion
1885 # repository). If it doesn't exist, we will need to fill the
1886 # branch. After the fill, the path on which we're committing
1887 # will exist.
1888 if c_rev.branch_name \
1889 and c_rev.branch_name not in accounted_for_sym_names \
1890 and c_rev.branch_name not in self.done_symbols \
1891 and fill_needed(c_rev, Ctx()._persistence_manager):
1892 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1893 % c_rev.branch_name)
1894 svn_commit.set_symbolic_name(c_rev.branch_name)
1895 self.secondary_commits.append(svn_commit)
1896 accounted_for_sym_names.append(c_rev.branch_name)
1898 def _commit(self):
1899 """Generates the primary SVNCommit that corresponds the this
1900 CVSCommit."""
1901 # Generate an SVNCommit unconditionally. Even if the only change
1902 # in this CVSCommit is a deletion of an already-deleted file (that
1903 # is, a CVS revision in state 'dead' whose predecessor was also in
1904 # state 'dead'), the conversion will still generate a Subversion
1905 # revision containing the log message for the second dead
1906 # revision, because we don't want to lose that information.
1907 svn_commit = SVNCommit("commit")
1908 self.motivating_commit = svn_commit
1910 for c_rev in self.changes:
1911 svn_commit.add_revision(c_rev)
1912 # Only make a change if we need to. When 1.1.1.1 has an empty
1913 # deltatext, the explanation is almost always that we're looking
1914 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1915 # such imports, CVS creates an RCS file where 1.1 has the
1916 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1917 # content as 1.1. There's no reason to reflect this non-change
1918 # in the repository, so we want to do nothing in this case. (If
1919 # we were really paranoid, we could make sure 1.1's log message
1920 # is the CVS-generated "Initial revision\n", but I think the
1921 # conditions below are strict enough.)
1922 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1923 and (c_rev.rev == "1.1.1.1")):
1924 if c_rev.is_default_branch_revision():
1925 self.default_branch_cvs_revisions.append(c_rev)
1927 for c_rev in self.deletes:
1928 # When a file is added on a branch, CVS not only adds the file
1929 # on the branch, but generates a trunk revision (typically
1930 # 1.1) for that file in state 'dead'. We only want to add
1931 # this revision if the log message is not the standard cvs
1932 # fabricated log message.
1933 if c_rev.prev_rev is None:
1934 # c_rev.branches may be empty if the originating branch
1935 # has been excluded.
1936 if not c_rev.branches:
1937 continue
1938 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
1939 % (c_rev.filename(),
1940 c_rev.branches[0]))
1941 author, log_msg = \
1942 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
1943 if log_msg == cvs_generated_msg:
1944 continue
1946 svn_commit.add_revision(c_rev)
1947 if c_rev.is_default_branch_revision():
1948 self.default_branch_cvs_revisions.append(c_rev)
1950 # There is a slight chance that we didn't actually register any
1951 # CVSRevisions with our SVNCommit (see loop over self.deletes
1952 # above), so if we have no CVSRevisions, we don't flush the
1953 # svn_commit to disk and roll back our revnum.
1954 if len(svn_commit.cvs_revs) > 0:
1955 svn_commit.flush()
1956 else:
1957 # We will not be flushing this SVNCommit, so rollback the
1958 # SVNCommit revision counter.
1959 SVNCommit.revnum = SVNCommit.revnum - 1
1961 if not Ctx().trunk_only:
1962 for c_rev in self.revisions():
1963 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
1965 def _post_commit(self):
1966 """Generates any SVNCommits that we can perform now that _commit
1967 has happened. That is, handle non-trunk default branches.
1968 Sometimes an RCS file has a non-trunk default branch, so a commit
1969 on that default branch would be visible in a default CVS checkout
1970 of HEAD. If we don't copy that commit over to Subversion's trunk,
1971 then there will be no Subversion tree which corresponds to that
1972 CVS checkout. Of course, in order to copy the path over, we may
1973 first need to delete the existing trunk there. """
1975 # Only generate a commit if we have default branch revs
1976 if len(self.default_branch_cvs_revisions):
1977 # Generate an SVNCommit for all of our default branch c_revs.
1978 svn_commit = SVNCommit("post-commit default branch(es)")
1979 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
1980 for c_rev in self.default_branch_cvs_revisions:
1981 svn_commit.add_revision(c_rev)
1982 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
1983 svn_commit.revnum)
1984 self.secondary_commits.append(svn_commit)
1986 def process_revisions(self, done_symbols):
1987 """Process all the CVSRevisions that this instance has, creating
1988 one or more SVNCommits in the process. Generate fill SVNCommits
1989 only for symbols not in DONE_SYMBOLS (avoids unnecessary
1990 fills).
1992 Return the primary SVNCommit that corresponds to this CVSCommit.
1993 The returned SVNCommit is the commit that motivated any other
1994 SVNCommits generated in this CVSCommit."""
1995 self.done_symbols = done_symbols
1996 seconds = self.t_max - self.t_min + 1
1998 Log().write(LOG_VERBOSE, '-' * 60)
1999 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2000 if seconds == 1:
2001 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2002 % time.ctime(self.t_max))
2003 else:
2004 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2005 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2006 % (time.ctime(self.t_max), seconds))
2008 if seconds > COMMIT_THRESHOLD + 1:
2009 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2010 % (warning_prefix, COMMIT_THRESHOLD))
2012 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2013 self._commit()
2014 return self.motivating_commit
2016 self._pre_commit()
2017 self._commit()
2018 self._post_commit()
2020 for svn_commit in self.secondary_commits:
2021 svn_commit.set_date(self.motivating_commit.get_date())
2022 svn_commit.flush()
2024 return self.motivating_commit
2027 class SVNCommit:
2028 """This represents one commit to the Subversion Repository. There
2029 are three types of SVNCommits:
2031 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2033 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2035 3. Updates trunk to reflect the contents of a particular branch
2036 (this is to handle RCS default branches)."""
2038 # The revision number to assign to the next new SVNCommit.
2039 # We start at 2 because SVNRepositoryMirror uses the first commit
2040 # to create trunk, tags, and branches.
2041 revnum = 2
2043 class SVNCommitInternalInconsistencyError(Exception):
2044 """Exception raised if we encounter an impossible state in the
2045 SVNCommit Databases."""
2046 pass
2048 def __init__(self, description="", revnum=None, cvs_revs=None):
2049 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2050 If REVNUM, the SVNCommit will correspond to that revision number;
2051 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2052 REVNUM.
2054 It is an error to pass CVS_REVS without REVNUM, but you may pass
2055 REVNUM without CVS_REVS, and then add a revision at a time by
2056 invoking add_revision()."""
2057 self._description = description
2059 # Revprop metadata for this commit.
2061 # These initial values are placeholders. At least the log and the
2062 # date should be different by the time these are used.
2064 # They are private because their values should be returned encoded
2065 # in UTF8, but callers aren't required to set them in UTF8.
2066 # Therefore, accessor methods are used to set them, and
2067 # self.get_revprops() is used to to get them, in dictionary form.
2068 self._author = Ctx().username
2069 self._log_msg = "This log message means an SVNCommit was used too soon."
2070 self._max_date = 0 # Latest date seen so far.
2072 self.cvs_revs = cvs_revs or []
2073 if revnum:
2074 self.revnum = revnum
2075 else:
2076 self.revnum = SVNCommit.revnum
2077 SVNCommit.revnum = SVNCommit.revnum + 1
2079 # The symbolic name that is filled in this SVNCommit, if any
2080 self.symbolic_name = None
2082 # If this commit is a default branch synchronization, this
2083 # variable represents the subversion revision number of the
2084 # *primary* commit where the default branch changes actually
2085 # happened. It is None otherwise.
2087 # It is possible for multiple synchronization commits to refer to
2088 # the same motivating commit revision number, and it is possible
2089 # for a single synchronization commit to contain CVSRevisions on
2090 # multiple different default branches.
2091 self.motivating_revnum = None
2093 # is_tag is true only if this commit is a fill of a symbolic name
2094 # that is a tag, None in all other cases.
2095 self.is_tag = None
2097 def set_symbolic_name(self, name):
2098 "Set self.symbolic_name to NAME."
2099 name = _clean_symbolic_name(name)
2100 self.symbolic_name = name
2102 def set_motivating_revnum(self, revnum):
2103 "Set self.motivating_revnum to REVNUM."
2104 self.motivating_revnum = revnum
2106 def set_author(self, author):
2107 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2108 This is the only way to set an SVNCommit's author."""
2109 self._author = author
2111 def set_log_msg(self, msg):
2112 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2113 This is the only way to set an SVNCommit's log message."""
2114 self._log_msg = msg
2116 def set_date(self, date):
2117 """Set this SVNCommit's date to DATE (an integer).
2118 Note that self.add_revision() updates this automatically based on
2119 a CVSRevision; so you may not need to call this at all, and even
2120 if you do, the value may be overwritten by a later call to
2121 self.add_revision()."""
2122 self._max_date = date
2124 def get_date(self):
2125 """Returns this SVNCommit's date as an integer."""
2126 return self._max_date
2128 def get_revprops(self):
2129 """Return the Subversion revprops for this SVNCommit."""
2130 date = format_date(self._max_date)
2131 try:
2132 ### FIXME: The 'replace' behavior should be an option, like
2133 ### --encoding is.
2134 utf8_author = None
2135 if self._author is not None:
2136 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
2137 utf8_author = unicode_author.encode('utf8')
2138 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
2139 utf8_log = unicode_log.encode('utf8')
2140 return { 'svn:author' : utf8_author,
2141 'svn:log' : utf8_log,
2142 'svn:date' : date }
2143 except UnicodeError:
2144 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2145 % warning_prefix)
2146 Log().write(LOG_WARN, " author: '%s'" % self._author)
2147 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2148 Log().write(LOG_WARN, " date: '%s'" % date)
2149 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
2150 for c_rev in self.cvs_revs:
2151 Log().write(LOG_WARN, " ", c_rev.fname)
2153 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2154 "'--encoding=latin1'.\n")
2155 # It's better to fall back to the original (unknown encoding) data
2156 # than to either 1) quit or 2) record nothing at all.
2157 return { 'svn:author' : self._author,
2158 'svn:log' : self.get_log_msg(),
2159 'svn:date' : date }
2161 def add_revision(self, cvs_rev):
2162 self.cvs_revs.append(cvs_rev)
2163 if cvs_rev.timestamp > self._max_date:
2164 self._max_date = cvs_rev.timestamp
2166 def _is_primary_commit(self):
2167 """Return true if this is a primary SVNCommit, false otherwise."""
2168 return not (self.symbolic_name or self.motivating_revnum)
2170 def flush(self):
2171 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
2172 % (self.revnum, self._description))
2173 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2175 if self.motivating_revnum is not None:
2176 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2177 self.motivating_revnum)
2179 # If we're not a primary commit, then store our date and/or our
2180 # symbolic_name
2181 if not self._is_primary_commit():
2182 Ctx()._persistence_manager.set_name_and_date(self.revnum,
2183 self.symbolic_name,
2184 self._max_date)
2186 def __str__(self):
2187 """ Print a human-readable description of this SVNCommit. This
2188 description is not intended to be machine-parseable (although
2189 we're not going to stop you if you try!)"""
2191 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2192 if self.symbolic_name:
2193 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
2194 else:
2195 ret = ret + " NO symbolic name\n"
2196 ret = ret + " debug description: " + self._description + "\n"
2197 ret = ret + " cvs_revs:\n"
2198 for c_rev in self.cvs_revs:
2199 ret = ret + " " + c_rev.unique_key() + "\n"
2200 return ret
2202 def get_log_msg(self):
2203 """Returns the actual log message for a primary commit, and the
2204 appropriate manufactured log message for a secondary commit."""
2205 if self.symbolic_name is not None:
2206 return self._log_msg_for_symbolic_name_commit()
2207 elif self.motivating_revnum is not None:
2208 return self._log_msg_for_default_branch_commit()
2209 else:
2210 return self._log_msg
2212 def _log_msg_for_symbolic_name_commit(self):
2213 """Creates a log message for a manufactured commit that fills
2214 self.symbolic_name. If self.is_tag is true, write the log message
2215 as though for a tag, else write it as though for a branch."""
2216 type = 'branch'
2217 if self.is_tag:
2218 type = 'tag'
2220 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2221 space_or_newline = ' '
2222 if len(self.symbolic_name) >= 13:
2223 space_or_newline = '\n'
2225 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2226 % (type, space_or_newline, self.symbolic_name)
2228 def _log_msg_for_default_branch_commit(self):
2229 """Creates a log message for a manufactured commit that
2230 synchronizes a non-trunk default branch with trunk."""
2231 msg = 'This commit was generated by cvs2svn to compensate for ' \
2232 'changes in r%d,\n' \
2233 'which included commits to RCS files with non-trunk default ' \
2234 'branches.\n' % self.motivating_revnum
2235 return msg
2237 class CVSRevisionAggregator:
2238 """This class groups CVSRevisions into CVSCommits that represent
2239 at least one SVNCommit."""
2240 def __init__(self):
2241 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2242 if not Ctx().trunk_only:
2243 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_READ)
2244 self.cvs_commits = {}
2245 self.pending_symbols = {}
2246 # A list of symbols for which we've already encountered the last
2247 # CVSRevision that is a source for that symbol. That is, the
2248 # final fill for this symbol has been done, and we never need to
2249 # fill it again.
2250 self.done_symbols = [ ]
2252 # This variable holds the most recently created primary svn_commit
2253 # object. CVSRevisionAggregator maintains this variable merely
2254 # for its date, so that it can set dates for the SVNCommits
2255 # created in self.attempt_to_commit_symbols().
2256 self.latest_primary_svn_commit = None
2258 Ctx()._symbolings_logger = SymbolingsLogger()
2259 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2260 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2261 DB_OPEN_READ)
2264 def process_revision(self, c_rev):
2265 # Each time we read a new line, we scan the commits we've
2266 # accumulated so far to see if any are ready for processing now.
2267 ready_queue = [ ]
2268 for digest_key, cvs_commit in self.cvs_commits.items():
2269 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2270 ready_queue.append(cvs_commit)
2271 del self.cvs_commits[digest_key]
2272 continue
2273 # If the inbound commit is on the same file as a pending commit,
2274 # close the pending commit to further changes. Don't flush it though,
2275 # as there may be other pending commits dated before this one.
2276 # ### ISSUE: the has_file() check below is not optimal.
2277 # It does fix the dataloss bug where revisions would get lost
2278 # if checked in too quickly, but it can also break apart the
2279 # commits. The correct fix would require tracking the dependencies
2280 # between change sets and committing them in proper order.
2281 if cvs_commit.has_file(c_rev.fname):
2282 unused_id = digest_key + '-'
2283 # Find a string that does is not already a key in
2284 # the self.cvs_commits dict
2285 while self.cvs_commits.has_key(unused_id):
2286 unused_id = unused_id + '-'
2287 self.cvs_commits[unused_id] = cvs_commit
2288 del self.cvs_commits[digest_key]
2290 # Add this item into the set of still-available commits.
2291 if self.cvs_commits.has_key(c_rev.digest):
2292 cvs_commit = self.cvs_commits[c_rev.digest]
2293 else:
2294 author, log = self.metadata_db[c_rev.digest]
2295 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2296 author, log)
2297 cvs_commit = self.cvs_commits[c_rev.digest]
2298 cvs_commit.add_revision(c_rev)
2300 # If there are any elements in the ready_queue at this point, they
2301 # need to be processed, because this latest rev couldn't possibly
2302 # be part of any of them. Sort them into time-order, then process
2303 # 'em.
2304 ready_queue.sort()
2306 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2307 # commits are ready.
2308 if len(ready_queue) == 0:
2309 self.attempt_to_commit_symbols(ready_queue, c_rev)
2311 for cvs_commit in ready_queue[:]:
2312 self.latest_primary_svn_commit \
2313 = cvs_commit.process_revisions(self.done_symbols)
2314 ready_queue.remove(cvs_commit)
2315 self.attempt_to_commit_symbols(ready_queue, c_rev)
2317 def flush(self):
2318 """Commit anything left in self.cvs_commits. Then inform the
2319 SymbolingsLogger that all commits are done."""
2321 ready_queue = [ ]
2322 for k, v in self.cvs_commits.items():
2323 ready_queue.append((v, k))
2325 ready_queue.sort()
2326 for cvs_commit_tuple in ready_queue[:]:
2327 self.latest_primary_svn_commit = \
2328 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2329 ready_queue.remove(cvs_commit_tuple)
2330 del self.cvs_commits[cvs_commit_tuple[1]]
2331 self.attempt_to_commit_symbols([])
2333 if not Ctx().trunk_only:
2334 Ctx()._symbolings_logger.close()
2336 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2338 This function generates 1 SVNCommit for each symbol in
2339 self.pending_symbols that doesn't have an opening CVSRevision in
2340 either QUEUED_COMMITS or self.cvs_commits.values().
2342 If C_REV is not None, then we first add to self.pending_symbols
2343 any symbols from C_REV that C_REV is the last CVSRevision for.
2345 # If we're not doing a trunk-only conversion, get the symbolic
2346 # names that this c_rev is the last *source* CVSRevision for and
2347 # add them to those left over from previous passes through the
2348 # aggregator.
2349 if c_rev and not Ctx().trunk_only:
2350 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2351 self.pending_symbols[sym] = None
2353 # Make a list of all symbols that still have *source* CVSRevisions
2354 # in the pending commit queue (self.cvs_commits).
2355 open_symbols = {}
2356 for sym in self.pending_symbols.keys():
2357 for cvs_commit in self.cvs_commits.values() + queued_commits:
2358 if cvs_commit.opens_symbolic_name(sym):
2359 open_symbols[sym] = None
2360 break
2362 # Sort the pending symbols so that we will always process the
2363 # symbols in the same order, regardless of the order in which the
2364 # dict hashing algorithm hands them back to us. We do this so
2365 # that our tests will get the same results on all platforms.
2366 sorted_pending_symbols_keys = self.pending_symbols.keys()
2367 sorted_pending_symbols_keys.sort()
2368 for sym in sorted_pending_symbols_keys:
2369 if open_symbols.has_key(sym): # sym is still open--don't close it.
2370 continue
2371 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2372 svn_commit.set_symbolic_name(sym)
2373 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2374 svn_commit.flush()
2375 self.done_symbols.append(sym)
2376 del self.pending_symbols[sym]
2379 class SymbolingsReader:
2380 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2381 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2382 returning the correct opening and closing Subversion revision
2383 numbers for a given symbolic name."""
2384 def __init__(self):
2385 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2386 reads the offsets database into memory."""
2387 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2388 # The offsets_db is really small, and we need to read and write
2389 # from it a fair bit, so suck it into memory
2390 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2391 self.offsets = { }
2392 for key in offsets_db.db.keys():
2393 #print " ZOO:", key, offsets_db[key]
2394 self.offsets[key] = offsets_db[key]
2396 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2397 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2398 SymbolicNameFillingGuide object.
2400 Note that if we encounter an opening rev in this fill, but the
2401 corresponding closing rev takes place later than SVN_REVNUM, the
2402 closing will not be passed to SymbolicNameFillingGuide in this
2403 fill (and will be discarded when encountered in a later fill).
2404 This is perfectly fine, because we can still do a valid fill
2405 without the closing--we always try to fill what we can as soon as
2406 we can."""
2407 # It's possible to have a branch start with a file that was added
2408 # on a branch
2409 if not self.offsets.has_key(symbolic_name):
2410 return SymbolicNameFillingGuide(symbolic_name)
2411 # set our read offset for self.symbolings to the offset for
2412 # symbolic_name
2413 self.symbolings.seek(self.offsets[symbolic_name])
2415 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2416 while (1):
2417 fpos = self.symbolings.tell()
2418 line = self.symbolings.readline().rstrip()
2419 if not line:
2420 break
2421 name, revnum, type, svn_path = line.split(" ", 3)
2422 revnum = int(revnum)
2423 if (revnum > svn_revnum
2424 or name != symbolic_name):
2425 break
2426 symbol_fill.register(svn_path, revnum, type)
2428 # get current offset of the read marker and set it to the offset
2429 # for the beginning of the line we just read if we used anything
2430 # we read.
2431 if not symbol_fill.is_empty():
2432 self.offsets[symbolic_name] = fpos
2434 symbol_fill.make_node_tree()
2435 return symbol_fill
2438 class SymbolicNameFillingGuide:
2439 """A SymbolicNameFillingGuide is essentially a node tree
2440 representing the source paths to be copied to fill
2441 self.symbolic_name in the current SVNCommit.
2443 After calling self.register() on a series of openings and closings,
2444 call self.make_node_tree() to prepare self.node_tree for
2445 examination. See the docstring for self.make_node_tree() for
2446 details on the structure of self.node_tree.
2448 By walking self.node_tree and calling self.get_best_revnum() on each
2449 node, the caller can determine what subversion revision number to
2450 copy the path corresponding to that node from. self.node_tree
2451 should be treated as read-only.
2453 The caller can then descend to sub-nodes to see if their "best
2454 revnum" differs from their parents' and if it does, take appropriate
2455 actions to "patch up" the subtrees."""
2456 def __init__(self, symbolic_name):
2457 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2458 prepares it for receiving openings and closings.
2460 Returns a fully functional and armed SymbolicNameFillingGuide
2461 object."""
2462 self.name = symbolic_name
2464 self.opening_key = "/o"
2465 self.closing_key = "/c"
2467 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2469 # { svn_path : { self.opening_key : svn_revnum,
2470 # self.closing_key : svn_revnum }
2471 # ...}
2472 self.things = { }
2474 # The key for the root node of the node tree
2475 self.root_key = '0'
2476 # The dictionary that holds our node tree, seeded with the root key.
2477 self.node_tree = { self.root_key : { } }
2479 def get_best_revnum(self, node, preferred_revnum):
2480 """Determine the best subversion revision number to use when
2481 copying the source tree beginning at NODE. Returns a
2482 subversion revision number.
2484 PREFERRED_REVNUM is passed to self._best_rev and used to
2485 calculate the best_revnum."""
2486 revnum = SVN_INVALID_REVNUM
2488 # Aggregate openings and closings from the rev tree
2489 openings = self._list_revnums_for_key(node, self.opening_key)
2490 closings = self._list_revnums_for_key(node, self.closing_key)
2492 # Score the lists
2493 scores = self._score_revisions(self._sum_revnum_counts(openings),
2494 self._sum_revnum_counts(closings))
2496 revnum, max_score = self._best_rev(scores, preferred_revnum)
2498 if revnum == SVN_INVALID_REVNUM:
2499 sys.stderr.write(error_prefix + ": failed to find a revision "
2500 + "to copy from when copying %s\n" % name)
2501 sys.exit(1)
2502 return revnum, max_score
2505 def _best_rev(self, scores, preferred_rev):
2506 """Return the revision with the highest score from SCORES, a list
2507 returned by _score_revisions(). When the maximum score is shared
2508 by multiple revisions, the oldest revision is selected, unless
2509 PREFERRED_REV is one of the possibilities, in which case, it is
2510 selected."""
2511 max_score = 0
2512 preferred_rev_score = -1
2513 rev = SVN_INVALID_REVNUM
2514 if preferred_rev is None:
2515 # Comparison order of different types is arbitrary. Do not
2516 # expect None to compare less than int values below.
2517 # In Python 2.3 None compares with ints like negative infinity.
2518 # In Python 2.0 None compares with ints like positive infinity.
2519 preferred_rev = SVN_INVALID_REVNUM
2520 for revnum, count in scores:
2521 if count > max_score:
2522 max_score = count
2523 rev = revnum
2524 if revnum <= preferred_rev:
2525 preferred_rev_score = count
2526 if preferred_rev_score == max_score:
2527 rev = preferred_rev
2528 return rev, max_score
2531 def _score_revisions(self, openings, closings):
2532 """Return a list of revisions and scores based on OPENINGS and
2533 CLOSINGS. The returned list looks like:
2535 [(REV1 SCORE1), (REV2 SCORE2), ...]
2537 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2538 self.opening__key and self.closing_key from some file or
2539 directory node, or else None.
2541 Each score indicates that copying the corresponding revision (or
2542 any following revision up to the next revision in the list) of the
2543 object in question would yield that many correct paths at or
2544 underneath the object. There may be other paths underneath it
2545 which are not correct and would need to be deleted or recopied;
2546 those can only be detected by descending and examining their
2547 scores.
2549 If OPENINGS is false, return the empty list."""
2550 # First look for easy outs.
2551 if not openings:
2552 return []
2554 # Must be able to call len(closings) below.
2555 if closings is None:
2556 closings = []
2558 # No easy out, so wish for lexical closures and calculate the scores :-).
2559 scores = []
2560 opening_score_accum = 0
2561 for i in range(len(openings)):
2562 opening_rev, opening_score = openings[i]
2563 opening_score_accum = opening_score_accum + opening_score
2564 scores.append((opening_rev, opening_score_accum))
2565 min = 0
2566 for i in range(len(closings)):
2567 closing_rev, closing_score = closings[i]
2568 done_exact_rev = None
2569 insert_index = None
2570 insert_score = None
2571 for j in range(min, len(scores)):
2572 score_rev, score = scores[j]
2573 if score_rev >= closing_rev:
2574 if not done_exact_rev:
2575 if score_rev > closing_rev:
2576 insert_index = j
2577 insert_score = scores[j-1][1] - closing_score
2578 done_exact_rev = 1
2579 scores[j] = (score_rev, score - closing_score)
2580 else:
2581 min = j + 1
2582 if not done_exact_rev:
2583 scores.append((closing_rev,scores[-1][1] - closing_score))
2584 if insert_index is not None:
2585 scores.insert(insert_index, (closing_rev, insert_score))
2586 return scores
2588 def _sum_revnum_counts(self, rev_list):
2589 """Takes an array of revisions (REV_LIST), for example:
2591 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2593 and adds up every occurrence of each revision and returns a sorted
2594 array of tuples containing (svn_revnum, count):
2596 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2598 s = {}
2599 for k in rev_list: # Add up the scores
2600 if s.has_key(k):
2601 s[k] = s[k] + 1
2602 else:
2603 s[k] = 1
2604 a = s.items()
2605 a.sort()
2606 return a
2608 def _list_revnums_for_key(self, node, revnum_type_key):
2609 """Scan self.node_tree and return a list of all the revision
2610 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2611 for all leaf nodes at and under NODE.
2613 REVNUM_TYPE_KEY should be either self.opening_key or
2614 self.closing_key."""
2615 revnums = []
2617 # If the node has self.opening_key, it must be a leaf node--all
2618 # leaf nodes have at least an opening key (although they may not
2619 # have a closing key. Fetch revnum and return
2620 if (self.node_tree[node].has_key(self.opening_key) and
2621 self.node_tree[node].has_key(revnum_type_key)):
2622 revnums.append(self.node_tree[node][revnum_type_key])
2623 return revnums
2625 for key, node_contents in self.node_tree[node].items():
2626 if key[0] == '/':
2627 continue
2628 revnums = revnums + \
2629 self._list_revnums_for_key(node_contents, revnum_type_key)
2630 return revnums
2632 def register(self, svn_path, svn_revnum, type):
2633 """Collects opening and closing revisions for this
2634 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2635 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2636 first svn revision number that we can copy from (our opening), or
2637 the last (not inclusive) svn revision number that we can copy from
2638 (our closing). TYPE indicates whether this path is an opening or a
2639 a closing.
2641 The opening for a given SVN_PATH must be passed before the closing
2642 for it to have any effect... any closing encountered before a
2643 corresponding opening will be discarded.
2645 It is not necessary to pass a corresponding closing for every
2646 opening.
2648 # Always log an OPENING
2649 if type == OPENING:
2650 self.things[svn_path] = {self.opening_key: svn_revnum}
2651 # Only log a closing if we've already registered the opening for that path.
2652 elif type == CLOSING and self.things.has_key(svn_path):
2653 # When we have a non-trunk default branch, we may have multiple
2654 # closings--only register the first closing we encounter.
2655 if not self.things[svn_path].has_key(self.closing_key):
2656 self.things[svn_path][self.closing_key] = svn_revnum
2658 def make_node_tree(self):
2659 """Generates the SymbolicNameFillingGuide's node tree from
2660 self.things. Each leaf node maps self.opening_key to the earliest
2661 subversion revision from which this node/path may be copied; and
2662 optionally map self.closing_key to the subversion revision one
2663 higher than the last revision from which this node/path may be
2664 copied. Intermediate nodes never contain opening or closing
2665 flags."""
2667 for svn_path, open_close in self.things.items():
2668 parent_key = self.root_key
2670 path_so_far = ""
2671 # Walk up the path, one node at a time.
2672 components = svn_path.split('/')
2673 for component in components:
2674 path_so_far = path_so_far + '/' + component
2676 child_key = None
2677 if not self.node_tree[parent_key].has_key(component):
2678 child_key = gen_key()
2679 self.node_tree[child_key] = { }
2680 self.node_tree[parent_key][component] = child_key
2681 else:
2682 child_key = self.node_tree[parent_key][component]
2684 parent_key = child_key
2685 # Having reached the leaf, attach the value
2686 self.node_tree[parent_key] = open_close
2687 #print_node_tree(self.node_tree, self.root_key)
2689 def is_empty(self):
2690 """Return true if we haven't accumulated any openings or closings,
2691 false otherwise."""
2692 return not len(self.things)
2695 class FillSource:
2696 """Representation of a fill source used by the symbol filler in
2697 SVNRepositoryMirror."""
2698 def __init__(self, prefix, key):
2699 """Create an unscored fill source with a prefix and a key."""
2700 self.prefix = prefix
2701 self.key = key
2702 self.score = None
2703 self.revnum = None
2705 def set_score(self, score, revnum):
2706 """Set the SCORE and REVNUM."""
2707 self.score = score
2708 self.revnum = revnum
2710 def __cmp__(self, other):
2711 """Comparison operator used to sort FillSources in descending
2712 score order."""
2713 if self.score is None or other.score is None:
2714 raise TypeError, 'Tried to compare unscored FillSource'
2715 return cmp(other.score, self.score)
2718 class SVNRepositoryMirror:
2719 """Mirror a Subversion Repository as it is constructed, one
2720 SVNCommit at a time. The mirror is skeletal; it does not contain
2721 file contents. The creation of a dumpfile or Subversion repository
2722 is handled by delegates. See self.add_delegate method for how to
2723 set delegates.
2725 The structure of the repository is kept in two databases and one
2726 hash. The revs_db database maps revisions to root node keys, and
2727 the nodes_db database maps node keys to nodes. A node is a hash
2728 from directory names to keys. Both the revs_db and the nodes_db are
2729 stored on disk and each access is expensive.
2731 The nodes_db database only has the keys for old revisions. The
2732 revision that is being contructed is kept in memory in the new_nodes
2733 hash which is cheap to access.
2735 You must invoke _start_commit between SVNCommits.
2737 *** WARNING *** All path arguments to methods in this class CANNOT
2738 have leading or trailing slashes.
2741 class SVNRepositoryMirrorPathExistsError(Exception):
2742 """Exception raised if an attempt is made to add a path to the
2743 repository mirror and that path already exists in the youngest
2744 revision of the repository."""
2745 pass
2747 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2748 """Exception raised if a CVSRevision is found to have an unexpected
2749 operation (OP) value."""
2750 pass
2752 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2753 """Exception raised if an empty SymbolicNameFillingGuide is returned
2754 during a fill where the branch in question already exists."""
2755 pass
2757 def __init__(self):
2758 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2759 self.delegates = [ ]
2761 # This corresponds to the 'revisions' table in a Subversion fs.
2762 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
2763 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
2765 # This corresponds to the 'nodes' table in a Subversion fs. (We
2766 # don't need a 'representations' or 'strings' table because we
2767 # only track metadata, not file contents.)
2768 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
2769 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
2771 # Start at revision 0 without a root node. It will be created
2772 # by _open_writable_root_node.
2773 self.youngest = 0
2774 self.new_root_key = None
2775 self.new_nodes = { }
2777 if not Ctx().trunk_only:
2778 ###PERF IMPT: Suck this into memory.
2779 self.tags_db = TagsDatabase(DB_OPEN_READ)
2780 self.symbolings_reader = SymbolingsReader()
2782 def _initialize_repository(self, date):
2783 """Initialize the repository by creating the directories for
2784 trunk, tags, and branches. This method should only be called
2785 after all delegates are added to the repository mirror."""
2786 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2787 # magic therein
2788 svn_commit = SVNCommit("Initialization", 1)
2789 svn_commit.set_date(date)
2790 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2792 self._start_commit(svn_commit)
2793 self._mkdir(Ctx().trunk_base)
2794 if not Ctx().trunk_only:
2795 self._mkdir(Ctx().branches_base)
2796 self._mkdir(Ctx().tags_base)
2798 def _start_commit(self, svn_commit):
2799 """Start a new commit."""
2800 if self.youngest > 0:
2801 self._end_commit()
2803 self.youngest = svn_commit.revnum
2804 self.new_root_key = None
2805 self.new_nodes = { }
2807 self._invoke_delegates('start_commit', svn_commit)
2809 def _end_commit(self):
2810 """Called at the end of each commit. This method copies the newly
2811 created nodes to the on-disk nodes db."""
2812 if self.new_root_key is None:
2813 # No changes were made in this revision, so we make the root node
2814 # of the new revision be the same as the last one.
2815 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2816 else:
2817 self.revs_db[str(self.youngest)] = self.new_root_key
2818 # Copy the new nodes to the nodes_db
2819 for key, value in self.new_nodes.items():
2820 self.nodes_db[key] = value
2822 def _get_node(self, key):
2823 """Returns the node contents for KEY which may refer to either
2824 self.nodes_db or self.new_nodes."""
2825 if self.new_nodes.has_key(key):
2826 return self.new_nodes[key]
2827 else:
2828 return self.nodes_db[key]
2830 def _open_readonly_node(self, path, revnum):
2831 """Open a readonly node for PATH at revision REVNUM. Returns the
2832 node key and node contents if the path exists, else (None, None)."""
2833 # Get the root key
2834 if revnum == self.youngest:
2835 if self.new_root_key is None:
2836 node_key = self.revs_db[str(self.youngest - 1)]
2837 else:
2838 node_key = self.new_root_key
2839 else:
2840 node_key = self.revs_db[str(revnum)]
2842 for component in path.split('/'):
2843 node_contents = self._get_node(node_key)
2844 if not node_contents.has_key(component):
2845 return None
2846 node_key = node_contents[component]
2848 return node_key
2850 def _open_writable_root_node(self):
2851 """Open a writable root node. The current root node is returned
2852 immeditely if it is already writable. If not, create a new one by
2853 copying the contents of the root node of the previous version."""
2854 if self.new_root_key is not None:
2855 return self.new_root_key, self.new_nodes[self.new_root_key]
2857 if self.youngest < 2:
2858 new_contents = { }
2859 else:
2860 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2861 self.new_root_key = gen_key()
2862 self.new_nodes = { self.new_root_key: new_contents }
2864 return self.new_root_key, new_contents
2866 def _open_writable_node(self, svn_path, create):
2867 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2868 and any missing directories if CREATE is True."""
2869 parent_key, parent_contents = self._open_writable_root_node()
2871 # Walk up the path, one node at a time.
2872 path_so_far = None
2873 components = svn_path.split('/')
2874 for i in range(len(components)):
2875 component = components[i]
2876 this_key = this_contents = None
2877 path_so_far = _path_join(path_so_far, component)
2878 if parent_contents.has_key(component):
2879 # The component exists.
2880 this_key = parent_contents[component]
2881 if self.new_nodes.has_key(this_key):
2882 this_contents = self.new_nodes[this_key]
2883 else:
2884 # Suck the node from the nodes_db, but update the key
2885 this_contents = self.nodes_db[this_key]
2886 this_key = gen_key()
2887 self.new_nodes[this_key] = this_contents
2888 parent_contents[component] = this_key
2889 elif create:
2890 # The component does not exists, so we create it.
2891 this_contents = { }
2892 this_key = gen_key()
2893 self.new_nodes[this_key] = this_contents
2894 parent_contents[component] = this_key
2895 if i < len(components) - 1:
2896 self._invoke_delegates('mkdir', path_so_far)
2897 else:
2898 # The component does not exists and we are not instructed to
2899 # create it, so we give up.
2900 return None, None
2902 parent_key = this_key
2903 parent_contents = this_contents
2905 return this_key, this_contents
2907 def _path_exists(self, path):
2908 """If PATH exists in self.youngest of the svn repository mirror,
2909 return true, else return None.
2911 PATH must not start with '/'."""
2912 return self._open_readonly_node(path, self.youngest) is not None
2914 def _fast_delete_path(self, parent_path, parent_contents, component):
2915 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2916 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2917 in PARENT_CONTENTS."""
2918 if parent_contents.has_key(component):
2919 del parent_contents[component]
2920 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2922 def _delete_path(self, svn_path, should_prune=False):
2923 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2924 all ancestor directories that are made empty when SVN_PATH is deleted.
2925 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2927 NOTE: This function does *not* allow you delete top-level entries
2928 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2929 those entries."""
2930 pos = svn_path.rfind('/')
2931 parent_path = svn_path[:pos]
2932 entry = svn_path[pos+1:]
2933 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2934 if parent_key is not None:
2935 self._fast_delete_path(parent_path, parent_contents, entry)
2936 # The following recursion makes pruning an O(n^2) operation in the
2937 # worst case (where n is the depth of SVN_PATH), but the worst case
2938 # is probably rare, and the constant cost is pretty low. Another
2939 # drawback is that we issue a delete for each path and not just
2940 # a single delete for the topmost directory pruned.
2941 if (should_prune and len(parent_contents) == 0 and
2942 parent_path.find('/') != -1):
2943 self._delete_path(parent_path, True)
2945 def _mkdir(self, path):
2946 """Create PATH in the repository mirror at the youngest revision."""
2947 self._open_writable_node(path, True)
2948 self._invoke_delegates('mkdir', path)
2950 def _change_path(self, cvs_rev):
2951 """Register a change in self.youngest for the CVS_REV's svn_path
2952 in the repository mirror."""
2953 # We do not have to update the nodes because our mirror is only
2954 # concerned with the presence or absence of paths, and a file
2955 # content change does not cause any path changes.
2956 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
2958 def _add_path(self, cvs_rev):
2959 """Add the CVS_REV's svn_path to the repository mirror."""
2960 self._open_writable_node(cvs_rev.svn_path, True)
2961 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
2963 def _copy_path(self, src_path, dest_path, src_revnum):
2964 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
2965 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
2966 parent *must* exist, but DEST_PATH *cannot* exist.
2968 Return the node key and the contents of the new node at DEST_PATH
2969 as a dictionary."""
2970 # get the contents of the node of our src_path
2971 src_key = self._open_readonly_node(src_path, src_revnum)
2972 src_contents = self._get_node(src_key)
2974 # Get the parent path and the base path of the dest_path
2975 pos = dest_path.rindex('/')
2976 dest_parent = dest_path[:pos]
2977 dest_basename = dest_path[pos+1:]
2978 dest_parent_key, dest_parent_contents = \
2979 self._open_writable_node(dest_parent, False)
2981 if dest_parent_contents.has_key(dest_basename):
2982 msg = "Attempt to add path '%s' to repository mirror " % dest_path
2983 msg = msg + "when it already exists in the mirror."
2984 raise self.SVNRepositoryMirrorPathExistsError, msg
2986 dest_parent_contents[dest_basename] = src_key
2987 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
2989 # Yes sir, src_key and src_contents are also the contents of the
2990 # destination. This is a cheap copy, remember! :-)
2991 return src_key, src_contents
2993 def _fill_symbolic_name(self, svn_commit):
2994 """Performs all copies necessary to create as much of the the tag
2995 or branch SVN_COMMIT.symbolic_name as possible given the current
2996 revision of the repository mirror.
2998 The symbolic name is guaranteed to exist in the Subversion
2999 repository by the end of this call, even if there are no paths
3000 under it."""
3001 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3002 svn_commit.symbolic_name, self.youngest)
3004 # Create the list of sources for the symbolic name. All source
3005 # prefixes must be direct sources for the destination, i.e. we
3006 # must have 'trunk' and 'branches/my_branch' and not just
3007 # 'branches'.
3008 sources = []
3009 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
3010 if entry == Ctx().trunk_base:
3011 sources.append(FillSource(entry, key))
3012 elif entry == Ctx().branches_base:
3013 for entry2, key2 in symbol_fill.node_tree[key].items():
3014 sources.append(FillSource(entry + '/' + entry2, key2))
3015 else:
3016 raise # Should never happen
3017 if self.tags_db.has_key(svn_commit.symbolic_name):
3018 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
3019 else:
3020 dest_prefix = _path_join(Ctx().branches_base,
3021 svn_commit.symbolic_name)
3023 if sources:
3024 dest_key = self._open_writable_node(dest_prefix, False)[0]
3025 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3026 else:
3027 # We can only get here for a branch whose first commit is an add
3028 # (as opposed to a copy).
3029 dest_path = Ctx().branches_base + '/' + symbol_fill.name
3030 if not self._path_exists(dest_path):
3031 # If our symbol_fill was empty, that means that our first
3032 # commit on the branch was to a file added on the branch, and
3033 # that this is our first fill of that branch.
3035 # This case is covered by test 16.
3037 # ...we create the branch by copying trunk from the our
3038 # current revision number minus 1
3039 source_path = Ctx().trunk_base
3040 entries = self._copy_path(source_path, dest_path,
3041 svn_commit.revnum - 1)[1]
3042 # Now since we've just copied trunk to a branch that's
3043 # *supposed* to be empty, we delete any entries in the
3044 # copied directory.
3045 for entry in entries.keys():
3046 del_path = dest_path + '/' + entry
3047 # Delete but don't prune.
3048 self._delete_path(del_path)
3049 else:
3050 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
3051 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3052 msg = msg + "attempted to create a branch that already exists."
3053 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3055 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3056 path = None, parent_source_prefix = None,
3057 preferred_revnum = None, prune_ok = None):
3058 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3059 SOURCES, and recurse into the child items.
3061 DEST_PREFIX is the prefix of the destination directory, e.g.
3062 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3063 FillSource classes that are candidates to be copied to the
3064 destination. DEST_KEY is the key in self.nodes_db to the
3065 destination, or None if the destination does not yet exist.
3067 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3068 are at the top level, e.g. '/tags/my_tag'.
3070 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3071 the parent directory, and PREFERRED_REVNUM is an int which is the
3072 source revision number that the caller (who may have copied KEY's
3073 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3074 then no revision is preferable to any other (which probably means
3075 that no copies have happened yet).
3077 PRUNE_OK means that a copy has been made in this recursion, and
3078 it's safe to prune directories that are not in
3079 SYMBOL_FILL.node_tree, provided that said directory has a source
3080 prefix of one of the PARENT_SOURCE_PREFIX.
3082 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3083 should only be passed in by recursive calls."""
3084 # Calculate scores and revnums for all sources
3085 for source in sources:
3086 src_revnum, score = symbol_fill.get_best_revnum(source.key,
3087 preferred_revnum)
3088 source.set_score(score, src_revnum)
3090 # Sort the sources in descending score order so that we will make
3091 # a eventual copy from the source with the highest score.
3092 sources.sort()
3093 copy_source = sources[0]
3095 src_path = _path_join(copy_source.prefix, path)
3096 dest_path = _path_join(dest_prefix, path)
3098 # Figure out if we shall copy to this destination and delete any
3099 # destination path that is in the way.
3100 do_copy = 0
3101 if dest_key is None:
3102 do_copy = 1
3103 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3104 copy_source.revnum != preferred_revnum):
3105 # We are about to replace the destination, so we need to remove
3106 # it before we perform the copy.
3107 self._delete_path(dest_path)
3108 do_copy = 1
3110 if do_copy:
3111 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3112 copy_source.revnum)
3113 prune_ok = 1
3114 else:
3115 dest_entries = self._get_node(dest_key)
3117 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3118 # elements and the values are lists of FillSource classes where
3119 # this path element exists.
3120 src_entries = {}
3121 for source in sources:
3122 for entry, key in symbol_fill.node_tree[source.key].items():
3123 if entry[0] == '/': # Skip flags
3124 continue
3125 if not src_entries.has_key(entry):
3126 src_entries[entry] = []
3127 src_entries[entry].append(FillSource(source.prefix, key))
3129 if prune_ok:
3130 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3131 delete_list = [ ]
3132 for entry in dest_entries.keys():
3133 if not src_entries.has_key(entry):
3134 delete_list.append(entry)
3135 if delete_list:
3136 if not self.new_nodes.has_key(dest_key):
3137 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3138 # Sort the delete list to get "diffable" dumpfiles.
3139 delete_list.sort()
3140 for entry in delete_list:
3141 self._fast_delete_path(dest_path, dest_entries, entry)
3143 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3144 src_keys = src_entries.keys()
3145 src_keys.sort()
3146 for src_key in src_keys:
3147 if dest_entries.has_key(src_key):
3148 next_dest_key = dest_entries[src_key]
3149 else:
3150 next_dest_key = None
3151 self._fill(symbol_fill, dest_prefix, next_dest_key,
3152 src_entries[src_key], _path_join(path, src_key),
3153 copy_source.prefix, sources[0].revnum, prune_ok)
3155 def _synchronize_default_branch(self, svn_commit):
3156 """Propagate any changes that happened on a non-trunk default
3157 branch to the trunk of the repository. See
3158 CVSCommit._post_commit() for details on why this is necessary."""
3159 for cvs_rev in svn_commit.cvs_revs:
3160 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3161 if self._path_exists(cvs_rev.svn_trunk_path):
3162 # Delete the path on trunk...
3163 self._delete_path(cvs_rev.svn_trunk_path)
3164 # ...and copy over from branch
3165 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
3166 svn_commit.motivating_revnum)
3167 elif cvs_rev.op == OP_DELETE:
3168 # delete trunk path
3169 self._delete_path(cvs_rev.svn_trunk_path)
3170 else:
3171 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3172 % cvs_rev.op)
3173 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3175 def commit(self, svn_commit):
3176 """Add an SVNCommit to the SVNRepository, incrementing the
3177 Repository revision number, and changing the repository. Invoke
3178 the delegates' _start_commit() method."""
3180 if svn_commit.revnum == 2:
3181 self._initialize_repository(svn_commit.get_date())
3183 self._start_commit(svn_commit)
3185 if svn_commit.symbolic_name:
3186 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3187 svn_commit.symbolic_name)
3188 self._fill_symbolic_name(svn_commit)
3189 elif svn_commit.motivating_revnum:
3190 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3191 % svn_commit.motivating_revnum)
3192 self._synchronize_default_branch(svn_commit)
3193 else: # This actually commits CVSRevisions
3194 if len(svn_commit.cvs_revs) > 1: plural = "s"
3195 else: plural = ""
3196 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3197 % (len(svn_commit.cvs_revs), plural))
3198 for cvs_rev in svn_commit.cvs_revs:
3199 # See comment in CVSCommit._commit() for what this is all
3200 # about. Note that although asking self._path_exists() is
3201 # somewhat expensive, we only do it if the first two (cheap)
3202 # tests succeed first.
3203 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3204 and (cvs_rev.rev == "1.1.1.1")
3205 and self._path_exists(cvs_rev.svn_path)):
3206 if cvs_rev.op == OP_ADD:
3207 self._add_path(cvs_rev)
3208 elif cvs_rev.op == OP_CHANGE:
3209 # Fix for Issue #74:
3211 # Here's the scenario. You have file FOO that is imported
3212 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3213 # the file exists.
3215 # Moving forward in time, FOO is deleted on the default
3216 # branch (r1.1.1.2). cvs2svn determines that this delete
3217 # also needs to happen on trunk, so FOO is deleted on
3218 # trunk.
3220 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3221 # not 'dead', we assume it's a change). However, since
3222 # our trunk file has been deleted, svnadmin blows up--you
3223 # can't change a file that doesn't exist!
3225 # Soooo... we just check the path, and if it doesn't
3226 # exist, we do an add... if the path does exist, it's
3227 # business as usual.
3228 if not self._path_exists(cvs_rev.svn_path):
3229 self._add_path(cvs_rev)
3230 else:
3231 self._change_path(cvs_rev)
3233 if cvs_rev.op == OP_DELETE:
3234 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3236 def cleanup(self):
3237 """Callback for the Cleanup.register in self.__init__."""
3238 self.revs_db = None
3239 self.nodes_db = None
3241 def add_delegate(self, delegate):
3242 """Adds DELEGATE to self.delegates.
3244 For every delegate you add, as soon as SVNRepositoryMirror
3245 performs a repository action method, SVNRepositoryMirror will call
3246 the delegate's corresponding repository action method. Multiple
3247 delegates will be called in the order that they are added. See
3248 SVNRepositoryMirrorDelegate for more information."""
3249 self.delegates.append(delegate)
3251 def _invoke_delegates(self, method, *args):
3252 """Iterate through each of our delegates, in the order that they
3253 were added, and call the delegate's method named METHOD with the
3254 arguments in ARGS."""
3255 for delegate in self.delegates:
3256 getattr(delegate, method)(*args)
3258 def finish(self):
3259 """Calls the delegate finish method."""
3260 self._end_commit()
3261 self._invoke_delegates('finish')
3262 self.cleanup()
3265 class SVNCommitItem:
3266 """A wrapper class for CVSRevision objects upon which
3267 Subversion-related data (such as properties) may be hung."""
3269 def __init__(self, c_rev, make_svn_props):
3270 self.c_rev = c_rev
3271 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3272 self.eol_from_mime_type = Ctx().eol_from_mime_type
3273 self.no_default_eol = Ctx().no_default_eol
3274 self.keywords_off = Ctx().keywords_off
3275 self.mime_mapper = Ctx().mime_mapper
3277 # We begin with only a "CVS revision" property.
3278 self.svn_props = { }
3279 if self.set_cvs_revnum_properties:
3280 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3281 make_svn_props = True
3283 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3284 if make_svn_props:
3285 # Tack on the executableness, if any.
3286 if c_rev.file_executable:
3287 self.svn_props['svn:executable'] = '*'
3289 # Set the svn:keywords property, if appropriate. See issue #2.
3290 if c_rev.mode is None or c_rev.mode == 'kv' or c_rev.mode == 'kvl':
3291 if not self.keywords_off:
3292 self.svn_props['svn:keywords'] = 'Author Date Id Revision'
3294 # Set mime-type and eol. These two properties are intertwingled;
3295 # follow the conditionals carefully. See also issue #39.
3296 mime_type = None
3297 eol_style = None
3299 if self.mime_mapper:
3300 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3302 if not c_rev.mode == 'b':
3303 if not self.no_default_eol:
3304 eol_style = 'native'
3305 elif mime_type and self.eol_from_mime_type:
3306 if mime_type.startswith("text/"):
3307 eol_style = 'native'
3308 else:
3309 eol_style = None
3310 elif mime_type is None:
3311 # file is kb, and no other mimetype specified
3312 mime_type = 'application/octet-stream'
3314 if mime_type:
3315 self.svn_props['svn:mime-type'] = mime_type
3317 if eol_style:
3318 self.svn_props['svn:eol-style'] = eol_style
3321 class SVNRepositoryMirrorDelegate:
3322 """Abstract superclass for any delegate to SVNRepositoryMirror.
3323 Subclasses must implement all of the methods below.
3325 For each method, a subclass implements, in its own way, the
3326 Subversion operation implied by the method's name. For example, for
3327 the add_path method, the DumpfileDelegate would write out a
3328 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3329 would merely print that the path is being added to the repository,
3330 and the RepositoryDelegate would actually cause the path to be added
3331 to the Subversion repository that it is creating.
3334 def start_commit(self, svn_commit):
3335 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3336 see subclass implementation for details."""
3337 raise NotImplementedError
3339 def mkdir(self, path):
3340 """PATH is a string; see subclass implementation for details."""
3341 raise NotImplementedError
3343 def add_path(self, s_item):
3344 """S_ITEM is an SVNCommitItem; see subclass implementation for
3345 details."""
3346 raise NotImplementedError
3348 def change_path(self, s_item):
3349 """S_ITEM is an SVNCommitItem; see subclass implementation for
3350 details."""
3351 raise NotImplementedError
3353 def delete_path(self, path):
3354 """PATH is a string; see subclass implementation for
3355 details."""
3356 raise NotImplementedError
3358 def copy_path(self, src_path, dest_path, src_revnum):
3359 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3360 subversion revision number (int); see subclass implementation for
3361 details."""
3362 raise NotImplementedError
3364 def finish(self):
3365 """Perform any cleanup necessary after all revisions have been
3366 committed."""
3367 raise NotImplementedError
3370 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3371 """Create a Subversion dumpfile."""
3373 def __init__(self, dumpfile_path=None):
3374 """Return a new DumpfileDelegate instance, attached to a dumpfile
3375 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3377 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3378 property on files, when they are changed due to a corresponding
3379 CVS revision.
3381 If Ctx().mime_mapper is not None, then it is a MimeMapper
3382 instance, used to determine whether or not to set the
3383 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3384 is None, files marked with the CVS 'kb' flag will receive a mime
3385 type of "application/octet-stream".
3387 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3388 'native' for files not marked with the CVS 'kb' flag, except as
3389 superseded by Ctx().eol_from_mime_type (see below).
3391 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3392 to 'native' for all files to which Ctx().mime_mapper assigns a
3393 mime type beginning with "text/", and don't set 'svn:eol-style'
3394 for files assigned a type not beginning with "text/".
3395 """
3396 if dumpfile_path:
3397 self.dumpfile_path = dumpfile_path
3398 else:
3399 self.dumpfile_path = Ctx().dumpfile
3400 self.path_encoding = Ctx().encoding
3402 self.dumpfile = open(self.dumpfile_path, 'wb')
3403 self._write_dumpfile_header(self.dumpfile)
3405 def _write_dumpfile_header(self, dumpfile):
3406 # Initialize the dumpfile with the standard headers.
3408 # Since the CVS repository doesn't have a UUID, and the Subversion
3409 # repository will be created with one anyway, we don't specify a
3410 # UUID in the dumpflie
3411 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3413 def _utf8_path(self, path):
3414 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3415 encoded in self.path_encoding."""
3416 try:
3417 # Log messages can be converted with the 'replace' strategy,
3418 # but we can't afford any lossiness here.
3419 unicode_path = unicode(path, self.path_encoding, 'strict')
3420 return unicode_path.encode('utf-8')
3421 except UnicodeError:
3422 print "Unable to convert a path '%s' to internal encoding." % path
3423 print "Consider rerunning with (for example) '--encoding=latin1'"
3424 sys.exit(1)
3426 def start_commit(self, svn_commit):
3427 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3429 self.revision = svn_commit.revnum
3431 # The start of a new commit typically looks like this:
3433 # Revision-number: 1
3434 # Prop-content-length: 129
3435 # Content-length: 129
3437 # K 7
3438 # svn:log
3439 # V 27
3440 # Log message for revision 1.
3441 # K 10
3442 # svn:author
3443 # V 7
3444 # jrandom
3445 # K 8
3446 # svn:date
3447 # V 27
3448 # 2003-04-22T22:57:58.132837Z
3449 # PROPS-END
3451 # Notice that the length headers count everything -- not just the
3452 # length of the data but also the lengths of the lengths, including
3453 # the 'K ' or 'V ' prefixes.
3455 # The reason there are both Prop-content-length and Content-length
3456 # is that the former includes just props, while the latter includes
3457 # everything. That's the generic header form for any entity in a
3458 # dumpfile. But since revisions only have props, the two lengths
3459 # are always the same for revisions.
3461 # Calculate the total length of the props section.
3462 props = svn_commit.get_revprops()
3463 prop_names = props.keys()
3464 prop_names.sort()
3465 total_len = 10 # len('PROPS-END\n')
3466 for propname in prop_names:
3467 if props[propname] is None:
3468 continue
3469 klen = len(propname)
3470 klen_len = len('K %d' % klen)
3471 vlen = len(props[propname])
3472 vlen_len = len('V %d' % vlen)
3473 # + 4 for the four newlines within a given property's section
3474 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3476 # Print the revision header and props
3477 self.dumpfile.write('Revision-number: %d\n'
3478 'Prop-content-length: %d\n'
3479 'Content-length: %d\n'
3480 '\n'
3481 % (self.revision, total_len, total_len))
3483 for propname in prop_names:
3484 if props[propname] is None:
3485 continue
3486 self.dumpfile.write('K %d\n'
3487 '%s\n'
3488 'V %d\n'
3489 '%s\n' % (len(propname),
3490 propname,
3491 len(props[propname]),
3492 props[propname]))
3494 self.dumpfile.write('PROPS-END\n')
3495 self.dumpfile.write('\n')
3497 def mkdir(self, path):
3498 """Emit the creation of directory PATH."""
3499 self.dumpfile.write("Node-path: %s\n"
3500 "Node-kind: dir\n"
3501 "Node-action: add\n"
3502 "Content-length: 10\n"
3503 "\n"
3504 "\n" % self._utf8_path(path))
3506 def _add_or_change_path(self, s_item, op):
3507 """Emit the addition or change corresponding to S_ITEM.
3508 OP is either the constant OP_ADD or OP_CHANGE."""
3510 # Validation stuffs
3511 if op == OP_ADD:
3512 action = 'add'
3513 elif op == OP_CHANGE:
3514 action = 'change'
3515 else:
3516 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3517 % (error_prefix, op))
3518 sys.exit(1)
3520 # Convenience variables
3521 c_rev = s_item.c_rev
3522 svn_props = s_item.svn_props
3524 # The property handling here takes advantage of an undocumented
3525 # but IMHO consistent feature of the Subversion dumpfile-loading
3526 # code. When a node's properties aren't mentioned (that is, the
3527 # "Prop-content-length:" header is absent, no properties are
3528 # listed at all, and there is no "PROPS-END\n" line) then no
3529 # change is made to the node's properties.
3531 # This is consistent with the way dumpfiles behave w.r.t. text
3532 # content changes, so I'm comfortable relying on it. If you
3533 # commit a change to *just* the properties of some node that
3534 # already has text contents from a previous revision, then in the
3535 # dumpfile output for the prop change, no "Text-content-length:"
3536 # nor "Text-content-md5:" header will be present, and the text of
3537 # the file will not be given. But this does not cause the file's
3538 # text to be erased! It simply remains unchanged.
3540 # This works out great for cvs2svn, due to lucky coincidences:
3542 # For files, the only properties we ever set are set in the first
3543 # revision; all other revisions (including on branches) inherit
3544 # from that. After the first revision, we never change file
3545 # properties, therefore, there is no need to remember the full set
3546 # of properties on a given file once we've set it.
3548 # For directories, the only property we set is "svn:ignore", and
3549 # while we may change it after the first revision, we always do so
3550 # based on the contents of a ".cvsignore" file -- in other words,
3551 # CVS is doing the remembering for us, so we still don't have to
3552 # preserve the previous value of the property ourselves.
3554 # Calculate the (sorted-by-name) property string and length, if any.
3555 prop_contents = ''
3556 prop_names = svn_props.keys()
3557 prop_names.sort()
3558 for pname in prop_names:
3559 pval = svn_props[pname]
3560 prop_contents = prop_contents + \
3561 'K %d\n%s\nV %d\n%s\n' \
3562 % (len(pname), pname, len(pval), pval)
3563 if prop_contents:
3564 prop_contents = prop_contents + 'PROPS-END\n'
3565 props_len = len(prop_contents)
3566 else:
3567 props_len = 0
3569 props_header = ''
3570 if props_len:
3571 props_header = 'Prop-content-length: %d\n' % props_len
3573 # treat .cvsignore as a directory property
3574 dir_path, basename = os.path.split(c_rev.svn_path)
3575 if basename == ".cvsignore":
3576 ignore_vals = generate_ignores(c_rev)
3577 ignore_contents = '\n'.join(ignore_vals)
3578 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3579 (len(ignore_contents), ignore_contents))
3580 ignore_contents = ignore_contents + 'PROPS-END\n'
3581 ignore_len = len(ignore_contents)
3583 # write headers, then props
3584 self.dumpfile.write('Node-path: %s\n'
3585 'Node-kind: dir\n'
3586 'Node-action: change\n'
3587 'Prop-content-length: %d\n'
3588 'Content-length: %d\n'
3589 '\n'
3590 '%s'
3591 % (self._utf8_path(dir_path), ignore_len,
3592 ignore_len, ignore_contents))
3594 pipe_cmd, pipe = get_co_pipe(c_rev)
3595 self.dumpfile.write('Node-path: %s\n'
3596 'Node-kind: file\n'
3597 'Node-action: %s\n'
3598 '%s' # no property header if no props
3599 'Text-content-length: '
3600 % (self._utf8_path(c_rev.svn_path),
3601 action, props_header))
3603 pos = self.dumpfile.tell()
3605 self.dumpfile.write('0000000000000000\n'
3606 'Text-content-md5: 00000000000000000000000000000000\n'
3607 'Content-length: 0000000000000000\n'
3608 '\n')
3610 if prop_contents:
3611 self.dumpfile.write(prop_contents)
3613 # Insert the rev contents, calculating length and checksum as we go.
3614 checksum = md5.new()
3615 length = 0
3616 normalize_crlf = sys.platform == "win32" \
3617 and svn_props.has_key('svn:eol-style')
3618 trailing_cr = ""
3619 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3620 while buf:
3621 if normalize_crlf:
3622 buf = string.replace(buf,"\r\n","\n")
3623 if buf[-1] == "\r":
3624 trailing_cr = "\r"
3625 buf = buf[:-1]
3626 else:
3627 trailing_cr = ""
3628 checksum.update(buf)
3629 length = length + len(buf)
3630 self.dumpfile.write(buf)
3631 # optimize because of python's immutable strings
3632 if trailing_cr:
3633 buf = trailing_cr + pipe.fromchild.read(PIPE_READ_SIZE)
3634 else:
3635 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3636 pipe.fromchild.close()
3637 error_output = pipe.childerr.read()
3638 exit_status = pipe.wait()
3639 if exit_status:
3640 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3641 "and the following output:\n"
3642 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3644 # Go back to patch up the length and checksum headers:
3645 self.dumpfile.seek(pos, 0)
3646 # We left 16 zeros for the text length; replace them with the real
3647 # length, padded on the left with spaces:
3648 self.dumpfile.write('%16d' % length)
3649 # 16... + 1 newline + len('Text-content-md5: ') == 35
3650 self.dumpfile.seek(pos + 35, 0)
3651 self.dumpfile.write(checksum.hexdigest())
3652 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3653 self.dumpfile.seek(pos + 84, 0)
3654 # The content length is the length of property data, text data,
3655 # and any metadata around/inside around them.
3656 self.dumpfile.write('%16d' % (length + props_len))
3657 # Jump back to the end of the stream
3658 self.dumpfile.seek(0, 2)
3660 # This record is done (write two newlines -- one to terminate
3661 # contents that weren't themselves newline-termination, one to
3662 # provide a blank line for readability.
3663 self.dumpfile.write('\n\n')
3665 def add_path(self, s_item):
3666 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
3667 self._add_or_change_path(s_item, OP_ADD)
3669 def change_path(self, s_item):
3670 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
3671 self._add_or_change_path(s_item, OP_CHANGE)
3673 def delete_path(self, path):
3674 """Emit the deletion of PATH."""
3675 self.dumpfile.write('Node-path: %s\n'
3676 'Node-action: delete\n'
3677 '\n' % self._utf8_path(path))
3679 def copy_path(self, src_path, dest_path, src_revnum):
3680 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3681 # We don't need to include "Node-kind:" for copies; the loader
3682 # ignores it anyway and just uses the source kind instead.
3683 self.dumpfile.write('Node-path: %s\n'
3684 'Node-action: add\n'
3685 'Node-copyfrom-rev: %d\n'
3686 'Node-copyfrom-path: /%s\n'
3687 '\n'
3688 % (self._utf8_path(dest_path),
3689 src_revnum,
3690 self._utf8_path(src_path)))
3692 def finish(self):
3693 """Perform any cleanup necessary after all revisions have been
3694 committed."""
3695 self.dumpfile.close()
3698 class RepositoryDelegate(DumpfileDelegate):
3699 """Creates a new Subversion Repository. DumpfileDelegate does all
3700 of the heavy lifting."""
3701 def __init__(self):
3702 self.svnadmin = Ctx().svnadmin
3703 self.target = Ctx().target
3704 if not Ctx().existing_svnrepos:
3705 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3706 if Ctx().fs_type and Ctx().fs_type != 'bdb':
3707 # User specified something other than bdb.
3708 run_command('%s create %s "%s"' % (self.svnadmin,
3709 "--fs-type=%s" % Ctx().fs_type,
3710 self.target))
3711 elif Ctx().fs_type:
3712 # User explicitly specified bdb.
3714 # Since this is a BDB repository, pass --bdb-txn-nosync,
3715 # because it gives us a 4-5x speed boost (if cvs2svn is
3716 # creating the repository, cvs2svn should be the only program
3717 # accessing the svn repository (until cvs is done, at least)).
3718 # But we'll turn no-sync off in self.finish(), unless
3719 # instructed otherwise.
3720 run_command('%s create %s %s "%s"' % (self.svnadmin,
3721 "--fs-type=bdb",
3722 "--bdb-txn-nosync",
3723 self.target))
3724 else:
3725 # User didn't say what kind repository (bdb, fsfs, etc).
3726 # We still pass --bdb-txn-nosync. It's a no-op if the default
3727 # repository type doesn't support it, but we definitely want
3728 # it if BDB is the default.
3729 run_command('%s create %s "%s"' % (self.svnadmin,
3730 "--bdb-txn-nosync",
3731 self.target))
3734 # Since the output of this run is a repository, not a dumpfile,
3735 # the temporary dumpfiles we create should go in the tmpdir.
3736 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
3738 # This is 1 if a commit is in progress, otherwise None.
3739 self._commit_in_progress = None
3741 self.dumpfile = open(self.dumpfile_path, 'w+b')
3742 self.loader_pipe = Popen3('%s load -q "%s"' % (self.svnadmin, self.target),
3743 True)
3744 self.loader_pipe.fromchild.close()
3745 try:
3746 self._write_dumpfile_header(self.loader_pipe.tochild)
3747 except IOError:
3748 sys.stderr.write("%s: svnadmin failed with the following output while "
3749 "loading the dumpfile:\n" % (error_prefix))
3750 sys.stderr.write(self.loader_pipe.childerr.read())
3751 sys.exit(1)
3753 def _feed_pipe(self):
3754 """Feed the revision stored in the dumpfile to the svnadmin
3755 load pipe."""
3756 self.dumpfile.seek(0)
3757 while 1:
3758 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3759 if not len(data):
3760 break
3761 try:
3762 self.loader_pipe.tochild.write(data)
3763 except IOError:
3764 sys.stderr.write("%s: svnadmin failed with the following output while "
3765 "loading the dumpfile:\n" % (error_prefix))
3766 sys.stderr.write(self.loader_pipe.childerr.read())
3767 sys.exit(1)
3769 def start_commit(self, svn_commit):
3770 """Start a new commit. If a commit is already in progress, close
3771 the dumpfile, load it into the svn repository, open a new
3772 dumpfile, and write the header into it."""
3773 if self._commit_in_progress:
3774 self._feed_pipe()
3775 self.dumpfile.seek(0)
3776 self.dumpfile.truncate()
3777 DumpfileDelegate.start_commit(self, svn_commit)
3778 self._commit_in_progress = 1
3780 def finish(self):
3781 """Loads the last commit into the repository."""
3782 self._feed_pipe()
3783 self.dumpfile.close()
3784 self.loader_pipe.tochild.close()
3785 error_output = self.loader_pipe.childerr.read()
3786 exit_status = self.loader_pipe.wait()
3787 if exit_status:
3788 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3789 'and the following output:\n'
3790 '%s' % (error_prefix, exit_status, error_output))
3791 os.remove(self.dumpfile_path)
3793 # If this is a BDB repository, and we created the repository, and
3794 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
3795 # line in the DB_CONFIG file, because txn syncing should be on by
3796 # default in BDB repositories.
3798 # We determine if this is a BDB repository by looking for the
3799 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
3800 # checking Ctx().fs_type. That way this code will Do The Right
3801 # Thing in all circumstances.
3802 db_config = os.path.join(self.target, "db/DB_CONFIG")
3803 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
3804 and os.path.exists(db_config)):
3805 no_sync = 'set_flags DB_TXN_NOSYNC\n'
3807 contents = open(db_config, 'r').readlines()
3808 index = contents.index(no_sync)
3809 contents[index] = '# ' + no_sync
3810 contents = open(db_config, 'w').writelines(contents)
3813 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3814 """Makes no changes to the disk, but writes out information to
3815 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3816 print statements will state that we're doing something, when in
3817 reality, we aren't doing anything other than printing out that we're
3818 doing something. Kind of zen, really."""
3819 def __init__(self, total_revs):
3820 self.total_revs = total_revs
3822 def start_commit(self, svn_commit):
3823 """Prints out the Subversion revision number of the commit that is
3824 being started."""
3825 Log().write(LOG_VERBOSE, "=" * 60)
3826 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3827 (svn_commit.revnum, self.total_revs))
3829 def mkdir(self, path):
3830 """Print a line stating that we are creating directory PATH."""
3831 Log().write(LOG_VERBOSE, " New Directory", path)
3833 def add_path(self, s_item):
3834 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
3835 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
3837 def change_path(self, s_item):
3838 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
3839 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
3841 def delete_path(self, path):
3842 """Print a line stating that we are 'deleting' PATH."""
3843 Log().write(LOG_VERBOSE, " Deleting", path)
3845 def copy_path(self, src_path, dest_path, src_revnum):
3846 """Print a line stating that we are 'copying' revision SRC_REVNUM
3847 of SRC_PATH to DEST_PATH."""
3848 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3849 Log().write(LOG_VERBOSE, " to", dest_path)
3851 def finish(self):
3852 """State that we are done creating our repository."""
3853 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3854 Log().write(LOG_QUIET, "Done.")
3856 # This should be a local to pass1,
3857 # but Python 2.0 does not support nested scopes.
3858 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3859 def pass1():
3860 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3861 cd = CollectData()
3863 def visit_file(baton, dirname, files):
3864 cd = baton
3865 for fname in files:
3866 if fname[-2:] != ',v':
3867 continue
3868 cd.found_valid_file = 1
3869 pathname = os.path.join(dirname, fname)
3870 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3871 # drop the 'Attic' portion from the pathname for the canonical name.
3872 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3873 else:
3874 # If this file also exists in the attic, it's a fatal error
3875 attic_path = os.path.join(dirname, 'Attic', fname)
3876 if os.path.exists(attic_path):
3877 err = "%s: A CVS repository cannot contain both %s and %s" \
3878 % (error_prefix, pathname, attic_path)
3879 sys.stderr.write(err + '\n')
3880 cd.fatal_errors.append(err)
3881 cd.set_fname(pathname, pathname)
3882 Log().write(LOG_NORMAL, pathname)
3883 try:
3884 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3885 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3886 err = "%s: '%s' is not a valid ,v file" \
3887 % (error_prefix, pathname)
3888 sys.stderr.write(err + '\n')
3889 cd.fatal_errors.append(err)
3890 except:
3891 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3892 raise
3894 os.path.walk(Ctx().cvsroot, visit_file, cd)
3895 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3897 cd.write_symbol_db()
3899 if len(cd.fatal_errors) > 0:
3900 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3901 + "Error summary:\n"
3902 + "\n".join(cd.fatal_errors)
3903 + "\nExited due to fatal error(s).")
3905 if cd.found_valid_file is None:
3906 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3907 + "Are you absolutely certain you are pointing cvs2svn\n"
3908 + "at a CVS repository?\n"
3909 + "\nExited due to fatal error(s).")
3911 StatsKeeper().reset_c_rev_info()
3912 StatsKeeper().archive()
3913 Log().write(LOG_QUIET, "Done")
3915 def pass2():
3916 "Pass 2: clean up the revision information."
3918 symbol_db = SymbolDatabase()
3919 symbol_db.read()
3921 # Convert the list of regexps to a list of strings
3922 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
3924 error_detected = 0
3926 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
3927 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
3928 if blocked_excludes:
3929 for branch, blockers in blocked_excludes.items():
3930 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
3931 "excluded because the following symbols depend "
3932 "on it:\n" % (branch))
3933 for blocker in blockers:
3934 sys.stderr.write(" '%s'\n" % (blocker))
3935 sys.stderr.write("\n")
3936 error_detected = 1
3938 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
3939 invalid_forced_tags = [ ]
3940 for forced_tag in Ctx().forced_tags:
3941 if excludes.has_key(forced_tag):
3942 continue
3943 if symbol_db.branch_has_commit(forced_tag):
3944 invalid_forced_tags.append(forced_tag)
3945 if invalid_forced_tags:
3946 sys.stderr.write(error_prefix + ": The following branches cannot be "
3947 "forced to be tags because they have commits:\n")
3948 for tag in invalid_forced_tags:
3949 sys.stderr.write(" '%s'\n" % (tag))
3950 sys.stderr.write("\n")
3951 error_detected = 1
3953 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
3954 mismatches = symbol_db.find_mismatches(excludes)
3955 def is_not_forced(mismatch):
3956 name = mismatch[0]
3957 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
3958 mismatches = filter(is_not_forced, mismatches)
3959 if mismatches:
3960 sys.stderr.write(error_prefix + ": The following symbols are tags "
3961 "in some files and branches in others.\nUse "
3962 "--force-tag, --force-branch and/or --exclude to "
3963 "resolve the symbols.\n")
3964 for name, tag_count, branch_count, commit_count in mismatches:
3965 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
3966 "%d files and has commits in %d files.\n"
3967 % (name, tag_count, branch_count, commit_count))
3968 error_detected = 1
3970 # Bail out now if we found errors
3971 if error_detected:
3972 sys.exit(1)
3974 # Create the tags database
3975 tags_db = TagsDatabase(DB_OPEN_NEW)
3976 for tag in symbol_db.tags.keys():
3977 if tag not in Ctx().forced_branches:
3978 tags_db[tag] = None
3979 for tag in Ctx().forced_tags:
3980 tags_db[tag] = None
3982 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
3984 # We may have recorded some changes in revisions' timestamp. We need to
3985 # scan for any other files which may have had the same log message and
3986 # occurred at "the same time" and change their timestamps, too.
3988 # read the resync data file
3989 def read_resync(fname):
3990 "Read the .resync file into memory."
3992 ### note that we assume that we can hold the entire resync file in
3993 ### memory. really large repositories with whacky timestamps could
3994 ### bust this assumption. should that ever happen, then it is possible
3995 ### to split the resync file into pieces and make multiple passes,
3996 ### using each piece.
3999 # A digest maps to a sequence of lists which specify a lower and upper
4000 # time bound for matching up the commit. We keep a sequence of these
4001 # because a number of checkins with the same log message (e.g. an empty
4002 # log message) could need to be remapped. We also make them a list because
4003 # we will dynamically expand the lower/upper bound as we find commits
4004 # that fall into a particular msg and time range.
4006 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4008 resync = { }
4010 for line in fileinput.FileInput(fname):
4011 t1 = int(line[:8], 16)
4012 digest = line[9:DIGEST_END_IDX]
4013 t2 = int(line[DIGEST_END_IDX+1:], 16)
4014 t1_l = t1 - COMMIT_THRESHOLD/2
4015 t1_u = t1 + COMMIT_THRESHOLD/2
4016 if resync.has_key(digest):
4017 resync[digest].append([t1_l, t1_u, t2])
4018 else:
4019 resync[digest] = [ [t1_l, t1_u, t2] ]
4021 # For each digest, sort the resync items in it in increasing order,
4022 # based on the lower time bound.
4023 digests = resync.keys()
4024 for digest in digests:
4025 (resync[digest]).sort()
4027 return resync
4029 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4031 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4032 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4034 # process the revisions file, looking for items to clean up
4035 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4036 c_rev = CVSRevision(Ctx(), line[:-1])
4038 # Skip this entire revision if it's on an excluded branch
4039 if excludes.has_key(c_rev.branch_name):
4040 continue
4042 # Remove all references to excluded tags and branches
4043 def not_excluded(symbol, excludes=excludes):
4044 return not excludes.has_key(symbol)
4045 c_rev.branches = filter(not_excluded, c_rev.branches)
4046 c_rev.tags = filter(not_excluded, c_rev.tags)
4048 # Convert all branches that are forced to be tags
4049 for forced_tag in Ctx().forced_tags:
4050 if forced_tag in c_rev.branches:
4051 c_rev.branches.remove(forced_tag)
4052 c_rev.tags.append(forced_tag)
4054 # Convert all tags that are forced to be branches
4055 for forced_branch in Ctx().forced_branches:
4056 if forced_branch in c_rev.tags:
4057 c_rev.tags.remove(forced_branch)
4058 c_rev.branches.append(forced_branch)
4060 # see if this is "near" any of the resync records we
4061 # have recorded for this digest [of the log message].
4062 for record in resync.get(c_rev.digest, []):
4063 if record[0] <= c_rev.timestamp <= record[1]:
4064 # bingo! remap the time on this (record[2] is the new time).
4066 # adjust the time range. we want the COMMIT_THRESHOLD from the
4067 # bounds of the earlier/latest commit in this group.
4068 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4069 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4071 # By default this will be the new timestamp
4072 new_timestamp = record[2]
4073 # If the new timestamp is earlier than that of our previous revision
4074 if record[2] < c_rev.prev_timestamp:
4075 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4076 + " to time %s, which is before previous the time of"
4077 + " revision %s (%s):")
4078 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4079 c_rev.cvs_path, record[2],
4080 c_rev.prev_rev, c_rev.prev_timestamp))
4081 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4082 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4083 # attempted sync time, then sync back to c_rev.prev_timestamp
4084 # + 1...
4085 if (c_rev.prev_timestamp - record[2]) < COMMIT_THRESHOLD:
4086 new_timestamp = c_rev.prev_timestamp + 1
4087 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4088 new_timestamp))
4089 # ...otherwise, make no change
4090 else:
4091 new_timestamp = c_rev.timestamp
4092 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4093 warning_prefix)
4095 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4096 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4097 record[2] - c_rev.timestamp)
4098 Log().write(LOG_VERBOSE, msg)
4100 c_rev.timestamp = new_timestamp
4102 # stop looking for hits
4103 break
4105 output.write(str(c_rev) + "\n")
4106 Log().write(LOG_QUIET, "Done")
4108 def pass3():
4109 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4110 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4111 temp(DATAFILE + SORTED_REVS_SUFFIX))
4112 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4113 Log().write(LOG_QUIET, "Done")
4115 def pass4():
4116 """Iterate through sorted revs, storing them in a database.
4117 If we're not doing a trunk-only conversion, generate the
4118 LastSymbolicNameDatabase, which contains the last CVSRevision
4119 that is a source for each tag or branch.
4121 Log().write(LOG_QUIET,
4122 "Copying CVS revision data from flat file to database...")
4123 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4124 if not Ctx().trunk_only:
4125 Log().write(LOG_QUIET,
4126 "and finding last CVS revisions for all symbolic names...")
4127 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4128 else:
4129 # This is to avoid testing Ctx().trunk_only every time around the loop
4130 class DummyLSNDB:
4131 def noop(*args): pass
4132 log_revision = noop
4133 create_database = noop
4134 last_sym_name_db = DummyLSNDB()
4136 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4137 c_rev = CVSRevision(Ctx(), line[:-1])
4138 cvs_revs_db.log_revision(c_rev)
4139 last_sym_name_db.log_revision(c_rev)
4140 StatsKeeper().record_c_rev(c_rev)
4142 last_sym_name_db.create_database()
4143 StatsKeeper().archive()
4144 Log().write(LOG_QUIET, "Done")
4146 def pass5():
4148 Generate the SVNCommit <-> CVSRevision mapping
4149 databases. CVSCommit._commit also calls SymbolingsLogger to register
4150 CVSRevisions that represent an opening or closing for a path on a
4151 branch or tag. See SymbolingsLogger for more details.
4153 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4155 aggregator = CVSRevisionAggregator()
4156 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4157 c_rev = CVSRevision(Ctx(), line[:-1])
4158 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4159 aggregator.process_revision(c_rev)
4160 aggregator.flush()
4162 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4163 StatsKeeper().archive()
4164 Log().write(LOG_QUIET, "Done")
4166 def pass6():
4167 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4169 if not Ctx().trunk_only:
4170 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4171 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4172 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4173 Log().write(LOG_QUIET, "Done")
4175 def pass7():
4176 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4178 def generate_offsets_for_symbolings():
4179 """This function iterates through all the lines in
4180 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4181 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4182 where SYMBOLIC_NAME is first encountered. This will allow us to
4183 seek to the various offsets in the file and sequentially read only
4184 the openings and closings that we need."""
4186 ###PERF This is a fine example of a db that can be in-memory and
4187 #just flushed to disk when we're done. Later, it can just be sucked
4188 #back into memory.
4189 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4190 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4192 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4193 old_sym = ""
4194 while 1:
4195 fpos = file.tell()
4196 line = file.readline()
4197 if not line:
4198 break
4199 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4200 if not sym == old_sym:
4201 Log().write(LOG_VERBOSE, " ", sym)
4202 old_sym = sym
4203 offsets_db[sym] = fpos
4205 if not Ctx().trunk_only:
4206 generate_offsets_for_symbolings()
4207 Log().write(LOG_QUIET, "Done.")
4209 def pass8():
4210 svncounter = 2 # Repository initialization is 1.
4211 repos = SVNRepositoryMirror()
4212 persistence_manager = PersistenceManager(DB_OPEN_READ)
4214 if (Ctx().target):
4215 if not Ctx().dry_run:
4216 repos.add_delegate(RepositoryDelegate())
4217 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4218 else:
4219 if not Ctx().dry_run:
4220 repos.add_delegate(DumpfileDelegate())
4221 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4223 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4225 while(1):
4226 svn_commit = persistence_manager.get_svn_commit(svncounter)
4227 if not svn_commit:
4228 break
4229 repos.commit(svn_commit)
4230 svncounter += 1
4232 repos.finish()
4234 _passes = [
4235 pass1,
4236 pass2,
4237 pass3,
4238 pass4,
4239 pass5,
4240 pass6,
4241 pass7,
4242 pass8,
4246 class Ctx:
4247 """Session state for this run of cvs2svn. For example, run-time
4248 options are stored here. This class is a Borg, see
4249 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4251 __shared_state = { }
4252 def __init__(self):
4253 self.__dict__ = self.__shared_state
4254 if self.__dict__:
4255 return
4256 # Else, initialize to defaults.
4257 self.cvsroot = None
4258 self.target = None
4259 self.dumpfile = DUMPFILE
4260 self.tmpdir = '.'
4261 self.verbose = 0
4262 self.quiet = 0
4263 self.prune = 1
4264 self.existing_svnrepos = 0
4265 self.dump_only = 0
4266 self.dry_run = 0
4267 self.trunk_only = 0
4268 self.trunk_base = "trunk"
4269 self.tags_base = "tags"
4270 self.branches_base = "branches"
4271 self.encoding = "ascii"
4272 self.mime_types_file = None
4273 self.mime_mapper = None
4274 self.no_default_eol = 0
4275 self.eol_from_mime_type = 0
4276 self.keywords_off = 0
4277 self.use_cvs = None
4278 self.svnadmin = "svnadmin"
4279 self.username = None
4280 self.print_help = 0
4281 self.skip_cleanup = 0
4282 self.cvs_revnums = 0
4283 self.bdb_txn_nosync = 0
4284 self.fs_type = None
4285 self.forced_branches = []
4286 self.forced_tags = []
4287 self.excludes = []
4288 self.symbol_transforms = []
4290 class MimeMapper:
4291 """A class that provides mappings from file names to MIME types.
4292 Note that we should really be using Python's 'mimetypes' module.
4293 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4294 for more."""
4296 def __init__(self):
4297 self.mappings = { }
4299 def set_mime_types_file(self, mime_types_file):
4300 for line in fileinput.input(mime_types_file):
4301 if line.startswith("#"):
4302 continue
4304 # format of a line is something like
4305 # text/plain c h cpp
4306 extensions = line.split()
4307 if len(extensions) < 2:
4308 continue
4309 type = extensions.pop(0)
4310 for ext in extensions:
4311 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4312 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
4313 % (warning_prefix, ext, self.mappings[ext], type))
4314 self.mappings[ext] = type
4317 def get_type_from_filename(self, filename):
4318 basename, extension = os.path.splitext(os.path.basename(filename))
4320 # Extension includes the dot, so strip it (will leave extension
4321 # empty if filename ends with a dot, which is ok):
4322 extension = extension[1:]
4324 # If there is no extension (or the file ends with a period), use
4325 # the base name for mapping. This allows us to set mappings for
4326 # files such as README or Makefile:
4327 if not extension:
4328 extension = basename
4329 if self.mappings.has_key(extension):
4330 return self.mappings[extension]
4331 return None
4334 def convert(start_pass, end_pass):
4335 "Convert a CVS repository to an SVN repository."
4337 cleanup = Cleanup()
4338 times = [ None ] * (end_pass + 1)
4339 times[start_pass - 1] = time.time()
4340 StatsKeeper().set_start_time(time.time())
4341 for i in range(start_pass - 1, end_pass):
4342 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4343 _passes[i]()
4344 times[i + 1] = time.time()
4345 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4346 # Dispose of items in Ctx() not intended to live past the end of the pass
4347 # (Identified by exactly one leading underscore)
4348 for attr in dir(Ctx()):
4349 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4350 and not attr[:6] == "_Ctx__"):
4351 delattr(Ctx(), attr)
4352 if not Ctx().skip_cleanup:
4353 cleanup.cleanup(_passes[i])
4354 StatsKeeper().set_end_time(time.time())
4356 Log().write(LOG_QUIET, StatsKeeper())
4357 if end_pass < 4:
4358 Log().write(LOG_QUIET, '(These are unaltered CVS repository stats and do not\n'
4359 + ' reflect tags or branches excluded via --exclude)\n')
4360 print StatsKeeper().timings()
4363 def usage():
4364 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4365 % os.path.basename(sys.argv[0])
4366 print ' --help, -h print this usage message and exit with success'
4367 print ' --version print the version number'
4368 print ' -q quiet'
4369 print ' -v verbose'
4370 print ' -s PATH path for SVN repos'
4371 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
4372 print ' If only START is given, run only pass START'
4373 print ' (implicitly enables --skip-cleanup)'
4374 print ' --existing-svnrepos load into existing SVN repository'
4375 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4376 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4377 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4378 print ' --dry-run do not create a repository or a dumpfile;'
4379 print ' just print what would happen.'
4380 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4381 print ' (only use this if having problems with RCS)'
4382 print ' --svnadmin=PATH path to the svnadmin program'
4383 print ' --trunk-only convert only trunk commits, not tags nor branches'
4384 print ' --trunk=PATH path for trunk (default: %s)' \
4385 % Ctx().trunk_base
4386 print ' --branches=PATH path for branches (default: %s)' \
4387 % Ctx().branches_base
4388 print ' --tags=PATH path for tags (default: %s)' \
4389 % Ctx().tags_base
4390 print ' --no-prune don\'t prune empty directories'
4391 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4392 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
4393 % Ctx().encoding
4394 print ' --force-branch=NAME force NAME to be a branch'
4395 print ' --force-tag=NAME force NAME to be a tag'
4396 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4397 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4398 print ' use Python regexp and reference syntax respectively'
4399 print ' --username=NAME username for cvs2svn-synthesized commits'
4400 print ' --skip-cleanup prevent the deletion of intermediate files'
4401 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4402 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4403 print ' --cvs-revnums record CVS revision numbers as file properties'
4404 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4405 ' setting svn:mime-type'
4406 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4407 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4408 print ' --keywords-off don\'t set svn:keywords on any files (cvs2svn sets'
4409 print ' "svn:keywords to author date id" on non-binary files'
4410 print ' by default)'
4412 def main():
4413 # Convenience var, so we don't have to keep instantiating this Borg.
4414 ctx = Ctx()
4416 profiling = None
4417 start_pass = 1
4418 end_pass = len(_passes)
4420 try:
4421 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4422 [ "help", "create", "trunk=",
4423 "username=", "existing-svnrepos",
4424 "branches=", "tags=", "encoding=",
4425 "force-branch=", "force-tag=", "exclude=",
4426 "use-cvs", "mime-types=",
4427 "eol-from-mime-type", "no-default-eol",
4428 "trunk-only", "no-prune", "dry-run",
4429 "dump-only", "dumpfile=", "tmpdir=",
4430 "svnadmin=", "skip-cleanup", "cvs-revnums",
4431 "bdb-txn-nosync", "fs-type=",
4432 "version", "profile",
4433 "keywords-off", "symbol-transform="])
4434 except getopt.GetoptError, e:
4435 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4436 usage()
4437 sys.exit(1)
4439 for opt, value in opts:
4440 if opt == '--version':
4441 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4442 sys.exit(0)
4443 elif opt == '-p':
4444 # Don't cleanup if we're doing incrementals.
4445 ctx.skip_cleanup = 1
4446 if value.find(':') > 0:
4447 start_pass, end_pass = map(int, value.split(':'))
4448 else:
4449 end_pass = start_pass = int(value)
4450 if start_pass > len(_passes) or start_pass < 1:
4451 print '%s: illegal value (%d) for starting pass. '\
4452 'must be 1 through %d.' % (error_prefix, int(start_pass),
4453 len(_passes))
4454 sys.exit(1)
4455 if end_pass < start_pass or end_pass > len(_passes):
4456 print '%s: illegal value (%d) for ending pass. ' \
4457 'must be %d through %d.' % (error_prefix, int(end_pass),
4458 int(start_pass), len(_passes))
4459 sys.exit(1)
4460 elif (opt == '--help') or (opt == '-h'):
4461 ctx.print_help = 1
4462 elif opt == '-v':
4463 Log().log_level = LOG_VERBOSE
4464 ctx.verbose = 1
4465 elif opt == '-q':
4466 Log().log_level = LOG_QUIET
4467 ctx.quiet = 1
4468 elif opt == '-s':
4469 ctx.target = value
4470 elif opt == '--existing-svnrepos':
4471 ctx.existing_svnrepos = 1
4472 elif opt == '--dumpfile':
4473 ctx.dumpfile = value
4474 elif opt == '--tmpdir':
4475 ctx.tmpdir = value
4476 elif opt == '--use-cvs':
4477 ctx.use_cvs = 1
4478 elif opt == '--svnadmin':
4479 ctx.svnadmin = value
4480 elif opt == '--trunk-only':
4481 ctx.trunk_only = 1
4482 elif opt == '--trunk':
4483 if not value:
4484 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4485 ctx.trunk_base = value
4486 elif opt == '--branches':
4487 if not value:
4488 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4489 ctx.branches_base = value
4490 elif opt == '--tags':
4491 if not value:
4492 sys.exit("%s: cannot pass an empty path to %s." % (error_prefix, opt))
4493 ctx.tags_base = value
4494 elif opt == '--no-prune':
4495 ctx.prune = None
4496 elif opt == '--dump-only':
4497 ctx.dump_only = 1
4498 elif opt == '--dry-run':
4499 ctx.dry_run = 1
4500 elif opt == '--encoding':
4501 ctx.encoding = value
4502 elif opt == '--force-branch':
4503 ctx.forced_branches.append(value)
4504 elif opt == '--force-tag':
4505 ctx.forced_tags.append(value)
4506 elif opt == '--exclude':
4507 try:
4508 ctx.excludes.append(re.compile('^' + value + '$'))
4509 except re.error, e:
4510 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4511 elif opt == '--mime-types':
4512 ctx.mime_types_file = value
4513 elif opt == '--eol-from-mime-type':
4514 ctx.eol_from_mime_type = 1
4515 elif opt == '--no-default-eol':
4516 ctx.no_default_eol = 1
4517 elif opt == '--keywords-off':
4518 ctx.keywords_off = 1
4519 elif opt == '--username':
4520 ctx.username = value
4521 elif opt == '--skip-cleanup':
4522 ctx.skip_cleanup = 1
4523 elif opt == '--cvs-revnums':
4524 ctx.cvs_revnums = 1
4525 elif opt == '--bdb-txn-nosync':
4526 ctx.bdb_txn_nosync = 1
4527 elif opt == '--fs-type':
4528 ctx.fs_type = value
4529 elif opt == '--create':
4530 sys.stderr.write(warning_prefix +
4531 ': The behaviour produced by the --create option is now the '
4532 'default,\nand passing the option is deprecated.\n')
4533 elif opt == '--profile':
4534 profiling = 1
4535 elif opt == '--symbol-transform':
4536 ctx.symbol_transforms.append(value.split(":"))
4538 if ctx.print_help:
4539 usage()
4540 sys.exit(0)
4542 # Consistency check for options and arguments.
4543 if len(args) == 0:
4544 usage()
4545 sys.exit(1)
4547 if len(args) > 1:
4548 sys.stderr.write(error_prefix +
4549 ": must pass only one CVS repository.\n")
4550 usage()
4551 sys.exit(1)
4553 ctx.cvsroot = args[0]
4555 if not os.path.isdir(ctx.cvsroot):
4556 sys.stderr.write(error_prefix +
4557 ": the given CVS repository path '%s' is not an "
4558 "existing directory.\n" % ctx.cvsroot)
4559 sys.exit(1)
4561 if ctx.use_cvs:
4562 # Ascend above the specified root if necessary, to find the cvs_repository
4563 # (a directory containing a CVSROOT directory) and the cvs_module (the
4564 # path of the conversion root within the cvs repository)
4565 # NB: cvs_module must be seperated by '/' *not* by os.sep .
4566 ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
4567 prev_cvs_repository = None
4568 ctx.cvs_module = ""
4569 while prev_cvs_repository != ctx.cvs_repository:
4570 if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
4571 break
4572 prev_cvs_repository = ctx.cvs_repository
4573 ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
4574 ctx.cvs_module = module_component + "/" + ctx.cvs_module
4575 else:
4576 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
4577 sys.stderr.write(error_prefix +
4578 ": the path '%s' is not a CVS repository, nor a path " \
4579 "within a CVS repository. A CVS repository contains " \
4580 "a CVSROOT directory within its root directory.\n" \
4581 % ctx.cvsroot)
4582 sys.exit(1)
4583 os.environ['CVSROOT'] = ctx.cvs_repository
4585 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
4586 sys.stderr.write(error_prefix +
4587 ": must pass one of '-s' or '--dump-only'.\n")
4588 sys.exit(1)
4590 def not_both(opt1val, opt1name, opt2val, opt2name):
4591 if opt1val and opt2val:
4592 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4593 % (opt1name, opt2name))
4594 sys.exit(1)
4596 not_both(ctx.target, '-s',
4597 ctx.dump_only, '--dump-only')
4599 not_both(ctx.dump_only, '--dump-only',
4600 ctx.existing_svnrepos, '--existing-svnrepos')
4602 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4603 ctx.existing_svnrepos, '--existing-svnrepos')
4605 not_both(ctx.dump_only, '--dump-only',
4606 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4608 not_both(ctx.quiet, '-q',
4609 ctx.verbose, '-v')
4611 not_both(ctx.fs_type, '--fs-type',
4612 ctx.existing_svnrepos, '--existing-svnrepos')
4614 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
4615 sys.stderr.write(error_prefix +
4616 ": cannot pass --bdb-txn-nosync with --fs-type=%s.\n" \
4617 % ctx.fs_type)
4618 sys.exit(1)
4620 if ((string.find(ctx.trunk_base, '/') > -1)
4621 or (string.find(ctx.tags_base, '/') > -1)
4622 or (string.find(ctx.branches_base, '/') > -1)):
4623 sys.stderr.write("%s: cannot pass multicomponent path to "
4624 "--trunk, --tags, or --branches yet.\n"
4625 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4626 "id=7 for details.\n" % error_prefix)
4627 sys.exit(1)
4629 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4630 sys.stderr.write(error_prefix +
4631 ": the svn-repos-path '%s' is not an "
4632 "existing directory.\n" % ctx.target)
4633 sys.exit(1)
4635 if not ctx.dump_only and not ctx.existing_svnrepos \
4636 and (not ctx.dry_run) and os.path.exists(ctx.target):
4637 sys.stderr.write(error_prefix +
4638 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4639 "'--existing-svnrepos'.\n" % ctx.target)
4640 sys.exit(1)
4642 if ctx.mime_types_file:
4643 ctx.mime_mapper = MimeMapper()
4644 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4646 # Make sure the tmp directory exists. Note that we don't check if
4647 # it's empty -- we want to be able to use, for example, "." to hold
4648 # tempfiles. But if we *did* want check if it were empty, we'd do
4649 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
4650 if not os.path.exists(ctx.tmpdir):
4651 os.mkdir(ctx.tmpdir)
4652 elif not os.path.isdir(ctx.tmpdir):
4653 sys.stderr.write(error_prefix +
4654 ": cvs2svn tried to use '%s' for temporary files, but that path\n"
4655 " exists and is not a directory. Please make it be a directory,\n"
4656 " or specify some other directory for temporary files.\n" \
4657 % ctx.tmpdir)
4658 sys.exit(1)
4660 if ctx.use_cvs:
4661 def cvs_ok():
4662 pipe = Popen3('cvs %s --version' % Ctx().cvs_global_arguments, True)
4663 pipe.tochild.close()
4664 pipe.fromchild.read()
4665 errmsg = pipe.childerr.read()
4666 status = pipe.wait()
4667 ok = len(errmsg) == 0 and status == 0
4668 return (ok, status, errmsg)
4670 ctx.cvs_global_arguments = "-q -R"
4671 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4672 if not ok:
4673 ctx.cvs_global_arguments = "-q"
4674 ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
4676 if not ok:
4677 sys.stderr.write(error_prefix +
4678 ": error executing CVS: status %s, error output:\n" \
4679 % (cvs_exitstatus) + cvs_errmsg)
4681 # But do lock the tmpdir, to avoid process clash.
4682 try:
4683 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4684 except OSError, e:
4685 if e.errno == errno.EACCES:
4686 sys.stderr.write(error_prefix + ": Permission denied:"
4687 + " No write access to output directory.\n")
4688 sys.exit(1)
4689 if e.errno == errno.EEXIST:
4690 sys.stderr.write(error_prefix +
4691 ": cvs2svn is using directory '%s' for temporary files, but\n"
4692 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
4693 " cvs2svn process is currently using '%s' as its temporary\n"
4694 " workspace. If you are certain that is not the case,\n"
4695 " then remove the '%s/cvs2svn.lock' subdirectory.\n" \
4696 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir))
4697 sys.exit(1)
4698 raise
4699 try:
4700 if profiling:
4701 import hotshot
4702 prof = hotshot.Profile('cvs2svn.hotshot')
4703 prof.runcall(convert, start_pass, end_pass)
4704 prof.close()
4705 else:
4706 convert(start_pass, end_pass)
4707 finally:
4708 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
4709 except: pass
4711 if __name__ == '__main__':
4712 main()