Fix bug causing cvs2svn to claim "svnadmin load" failed even if it succeeded.
[cvs2svn.git] / cvs2svn.py
blob13947ec1ccf7a18cf05514a9a3bd0e9e0959c0aa
1 #!/usr/bin/env python
3 # cvs2svn: ...
5 # ====================================================================
6 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://subversion.tigris.org/license-1.html.
11 # If newer versions of this license are posted there, you may use a
12 # newer version instead, at your option.
14 # This software consists of voluntary contributions made by many
15 # individuals. For exact contribution history, see the revision
16 # history and logs, available at http://cvs2svn.tigris.org/.
17 # ====================================================================
19 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
21 import cvs2svn_rcsparse
22 import os
23 import sys
24 import sha
25 import re
26 import time
27 import fileinput
28 import string
29 import getopt
30 import stat
31 import string
32 import md5
33 import marshal
34 import errno
35 import popen2
37 # Warnings and errors start with these strings. They are typically
38 # followed by a colon and a space, as in "%s: " ==> "Warning: ".
39 warning_prefix = "Warning"
40 error_prefix = "Error"
42 # Make sure this Python is recent enough.
43 if sys.hexversion < 0x2000000:
44 sys.stderr.write("'%s: Python 2.0 or higher required, "
45 "see www.python.org.\n" % error_prefix)
46 sys.exit(1)
48 # Pretend we have true booleans on older python versions
49 try:
50 True
51 except:
52 True = 1
53 False = 0
55 # Minimal, incomplete, version of popen2.Popen3 for those platforms
56 # for which popen2 does not provide it.
57 try:
58 Popen3 = popen2.Popen3
59 except AttributeError:
60 class Popen3:
61 def __init__(self, cmd, capturestderr):
62 if type(cmd) != str:
63 cmd = " ".join(cmd)
64 self.fromchild, self.tochild, self.childerr = popen2.popen3(cmd,
65 mode='b')
66 def wait(self):
67 return self.fromchild.close() or self.tochild.close() or \
68 self.childerr.close()
70 # DBM module selection
72 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
73 # so that the dbhash module used by anydbm will use bsddb3.
74 try:
75 import bsddb3
76 sys.modules['bsddb'] = sys.modules['bsddb3']
77 except ImportError:
78 pass
80 # 2. These DBM modules are not good for cvs2svn.
81 import anydbm
82 if (anydbm._defaultmod.__name__ == 'dumbdbm'
83 or anydbm._defaultmod.__name__ == 'dbm'):
84 print 'ERROR: your installation of Python does not contain a suitable'
85 print ' DBM module. This script cannot continue.'
86 print ' to solve: see http://python.org/doc/current/lib/module-anydbm.html'
87 print ' for details.'
88 sys.exit(1)
90 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
91 # Unfortunately, gdbm appears not to be trouble free, either.
92 if hasattr(anydbm._defaultmod, 'bsddb') \
93 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
94 try:
95 gdbm = __import__('gdbm')
96 except ImportError:
97 sys.stderr.write(warning_prefix +
98 ': The version of the bsddb module found '
99 'on your computer has been reported to malfunction on some datasets, '
100 'causing KeyError exceptions. You may wish to upgrade your Python to '
101 'version 2.3 or later.\n')
102 else:
103 anydbm._defaultmod = gdbm
105 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
106 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
107 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
109 # This really only matches standard '1.1.1.*'-style vendor revisions.
110 # One could conceivably have a file whose default branch is 1.1.3 or
111 # whatever, or was that at some point in time, with vendor revisions
112 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
113 # is the only time this regexp gets used), we'd have no basis for
114 # assuming that the non-standard vendor branch had ever been the
115 # default branch anyway, so we don't want this to match them anyway.
116 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
118 DATAFILE = 'cvs2svn-data'
119 DUMPFILE = 'cvs2svn-dump' # The "dumpfile" we create to load into the repos
121 # This text file contains records (1 per line) that describe svn
122 # filesystem paths that are the opening and closing source revisions
123 # for copies to tags and branches. The format is as follows:
125 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
127 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
128 # SVN_REVNUM are the primary and secondary sorting criteria for
129 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
130 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
131 # A sorted version of the above file.
132 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
134 # This file is a temporary file for storing symbolic_name -> closing
135 # CVSRevision until the end of our pass where we can look up the
136 # corresponding SVNRevNum for the closing revs and write these out to
137 # the SYMBOL_OPENINGS_CLOSINGS.
138 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
140 # Skeleton version of an svn filesystem.
141 # (These supersede and will eventually replace the two above.)
142 # See class SVNRepositoryMirror for how these work.
143 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
144 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
146 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
147 # SYMBOL_OPENINGS_CLOSINGS_SORTED
148 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
150 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
151 # the CVSRevision is the last such that is a source for those symbolic
152 # names. For example, if branch B's number is 1.3.0.2 in this CVS
153 # file, and this file's 1.3 is the latest (by date) revision among
154 # *all* CVS files that is a source for branch B, then the
155 # CVSRevision.unique_key() corresponding to this file at 1.3 would
156 # list at least B in its list.
157 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
159 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
160 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
161 ### the s-revs data in this database.
162 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
164 # Lists all symbolic names that are tags. Keys are strings (symbolic
165 # names), values are ignorable.
166 TAGS_DB = 'cvs2svn-tags.db'
168 # A list all tags. Each line consists of the tag name and the number
169 # of files in which it exists, separated by a space.
170 TAGS_LIST = 'cvs2svn-tags.txt'
172 # A list of all branches. The file is stored as a plain text file
173 # to make it easy to look at in an editor. Each line contains the
174 # branch name, the number of files where the branch is created, the
175 # commit count, and a list of tags and branches that are defined on
176 # revisions in the branch.
177 BRANCHES_LIST = 'cvs2svn-branches.txt'
179 # These two databases provide a bidirectional mapping between
180 # CVSRevision.unique_key()s and Subversion revision numbers.
182 # The first maps CVSRevision.unique_key() to a number; the values are
183 # not unique.
185 # The second maps a number to a list of CVSRevision.unique_key()s.
186 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
187 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
189 # This database maps svn_revnums to tuples of (symbolic_name, date).
191 # The svn_revnums are the revision numbers of all non-primary
192 # SVNCommits. No primary SVNCommit has a key in this database.
194 # The date is stored for all commits in this database.
196 # For commits that fill symbolic names, the symbolic_name is stored.
197 # For commits that default branch syncs, the symbolic_name is None.
198 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
200 # This database maps svn_revnums of a default branch synchronization
201 # commit to the svn_revnum of the primary SVNCommit that motivated it.
203 # (NOTE: Secondary commits that fill branches and tags also have a
204 # motivating commit, but we do not record it because it is (currently)
205 # not needed for anything.)
207 # This mapping is used when generating the log message for the commit
208 # that synchronizes the default branch with trunk.
209 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
211 # How many bytes to read at a time from a pipe. 128 kiB should be
212 # large enough to be efficient without wasting too much memory.
213 PIPE_READ_SIZE = 128 * 1024
215 # Record the default RCS branches, if any, for CVS filepaths.
217 # The keys are CVS filepaths, relative to the top of the repository
218 # and with the ",v" stripped off, so they match the cvs paths used in
219 # Commit.commit(). The values are vendor branch revisions, such as
220 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
221 # represents the highest vendor branch revision thought to have ever
222 # been head of the default branch.
224 # The reason we record a specific vendor revision, rather than a
225 # default branch number, is that there are two cases to handle:
227 # One case is simple. The RCS file lists a default branch explicitly
228 # in its header, such as '1.1.1'. In this case, we know that every
229 # revision on the vendor branch is to be treated as head of trunk at
230 # that point in time.
232 # But there's also a degenerate case. The RCS file does not currently
233 # have a default branch, yet we can deduce that for some period in the
234 # past it probably *did* have one. For example, the file has vendor
235 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
236 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
237 # case, we should record 1.1.1.96 as the last vendor revision to have
238 # been the head of the default branch.
239 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
241 # Records the author and log message for each changeset.
242 # The keys are author+log digests, the same kind used to identify
243 # unique revisions in the .revs, etc files. Each value is a tuple
244 # of two elements: '(author logmessage)'.
245 METADATA_DB = "cvs2svn-metadata.db"
247 REVS_SUFFIX = '.revs'
248 CLEAN_REVS_SUFFIX = '.c-revs'
249 SORTED_REVS_SUFFIX = '.s-revs'
250 RESYNC_SUFFIX = '.resync'
252 SVN_INVALID_REVNUM = -1
254 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
256 # Things that can happen to a file.
257 OP_NOOP = '-'
258 OP_ADD = 'A'
259 OP_DELETE = 'D'
260 OP_CHANGE = 'C'
262 # A deltatext either does or doesn't represent some change.
263 DELTATEXT_NONEMPTY = 'N'
264 DELTATEXT_EMPTY = 'E'
266 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
268 # Constants used in SYMBOL_OPENINGS_CLOSINGS
269 OPENING = 'O'
270 CLOSING = 'C'
272 # Officially, CVS symbolic names must use a fairly restricted set of
273 # characters. Unofficially, CVS 1.10 allows any character but [$,.:;@]
274 # We don't care if some repositories out there use characters outside the
275 # official set, as long as their tags start with a letter.
276 # Since the unofficial set also includes [/\] we need to translate those
277 # into ones that don't conflict with Subversion limitations.
278 symbolic_name_re = re.compile('^[a-zA-Z].*$')
280 def _clean_symbolic_name(name):
281 """Return symbolic name NAME, translating characters that Subversion
282 does not allow in a pathname."""
283 name = name.replace('/',',')
284 name = name.replace('\\',';')
285 return name
287 def _path_join(*components):
288 """Join two or more pathname COMPONENTS, inserting '/' as needed.
289 Empty component are skipped."""
290 return string.join(filter(None, components), '/')
292 def run_command(command):
293 if os.system(command):
294 sys.exit('Command failed: "%s"' % command)
296 def relative_name(cvsroot, fname):
297 l = len(cvsroot)
298 if fname[:l] == cvsroot:
299 if fname[l] == os.sep:
300 return string.replace(fname[l+1:], os.sep, '/')
301 return string.replace(fname[l:], os.sep, '/')
302 sys.stderr.write("%s: relative_path('%s', '%s'): fname is not a sub-path of"
303 " cvsroot\n" % (error_prefix, cvsroot, fname))
304 sys.exit(1)
306 # Return a string that has not been returned by gen_key() before.
307 gen_key_base = 0L
308 def gen_key():
309 global gen_key_base
310 key = '%x' % gen_key_base
311 gen_key_base = gen_key_base + 1
312 return key
314 if sys.platform == "win32":
315 def escape_shell_arg(str):
316 return '"' + string.replace(str, '"', '"^""') + '"'
317 else:
318 def escape_shell_arg(str):
319 return "'" + string.replace(str, "'", "'\\''") + "'"
321 def format_date(date):
322 """Return an svn-compatible date string for DATE (seconds since epoch)."""
323 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
324 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
326 def sort_file(infile, outfile):
327 # sort the log files
329 # GNU sort will sort our dates differently (incorrectly!) if our
330 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
331 # it to 'C'
332 if os.environ.has_key('LC_ALL'):
333 lc_all_tmp = os.environ['LC_ALL']
334 else:
335 lc_all_tmp = None
336 os.environ['LC_ALL'] = 'C'
337 run_command('sort %s > %s' % (infile, outfile))
338 if lc_all_tmp is None:
339 del os.environ['LC_ALL']
340 else:
341 os.environ['LC_ALL'] = lc_all_tmp
343 def print_node_tree(tree, root_node, indent_depth=0):
344 """For debugging purposes. Prints all nodes in TREE that are
345 rooted at ROOT_NODE. INDENT_DEPTH is merely for purposes of
346 debugging with the print statement in this function."""
347 if not indent_depth:
348 print "TREE", "=" * 75
349 print "TREE:", " " * (indent_depth * 2), root_node, tree[root_node]
350 for key, value in tree[root_node].items():
351 if key[0] == '/': #Skip flags
352 continue
353 print_node_tree(tree, value, (indent_depth + 1))
355 def match_regexp_list(regexp_list, string):
356 """Return 1 if string matches any of the compiled regexps in REGEXP_LIST,
357 else return None."""
358 for regexp in regexp_list:
359 if regexp.match(string):
360 return 1
362 # These constants represent the log levels that this script supports
363 LOG_WARN = -1
364 LOG_QUIET = 0
365 LOG_NORMAL = 1
366 LOG_VERBOSE = 2
367 class Log:
368 """A Simple logging facility. Each line will be timestamped is
369 self.use_timestamps is TRUE. This class is a Borg."""
370 __shared_state = {}
371 def __init__(self):
372 self.__dict__ = self.__shared_state
373 if self.__dict__:
374 return
375 self.log_level = LOG_NORMAL
376 # Set this to true if you want to see timestamps on each line output.
377 self.use_timestamps = None
378 self.logger = sys.stdout
380 def _timestamp(self):
381 """Output a detailed timestamp at the beginning of each line output."""
382 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
384 def write(self, log_level, *args):
385 """This is the public method to use for writing to a file. Only
386 messages whose LOG_LEVEL is <= self.log_level will be printed. If
387 there are multiple ARGS, they will be separated by a space."""
388 if log_level > self.log_level:
389 return
390 if self.use_timestamps:
391 self._timestamp()
392 self.logger.write(' '.join(map(str,args)) + "\n")
395 class Cleanup:
396 """This singleton class manages any files created by cvs2svn. When
397 you first create a file, call Cleanup.register, passing the
398 filename, and the last pass that you need the file. After the end
399 of that pass, your file will be cleaned up after running an optional
400 callback. This class is a Borg."""
402 __shared_state = {}
403 def __init__(self):
404 self.__dict__ = self.__shared_state
405 if self.__dict__:
406 return
407 self._log = {}
408 self._callbacks = {}
410 def register(self, file, which_pass, callback=None):
411 """Register FILE for cleanup at the end of WHICH_PASS, running
412 function CALLBACK prior to removal. Registering a given FILE is
413 idempotent; you may register as many times as you wish, but it
414 will only be cleaned up once.
416 Note that if a file is registered multiple times, only the first
417 callback registered for that file will be called at cleanup
418 time. Also note that if you register a database file you must
419 close the database before cleanup, e.g. using a callback."""
420 if not self._log.has_key(which_pass):
421 self._log[which_pass] = {}
422 self._log[which_pass][file] = 1
423 if callback and not self._callbacks.has_key(file):
424 self._callbacks[file] = callback
426 def cleanup(self, which_pass):
427 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
428 if not self._log.has_key(which_pass):
429 return
430 for file in self._log[which_pass].keys():
431 Log().write(LOG_VERBOSE, "Deleting", file)
432 if self._callbacks.has_key(file):
433 self._callbacks[file]()
434 os.unlink(file)
437 # Always use these constants for opening databases.
438 DB_OPEN_READ = 'r'
439 DB_OPEN_NEW = 'n'
441 # A wrapper for anydbm that uses the marshal module to store items as
442 # strings.
443 class Database:
444 def __init__(self, filename, mode):
445 # pybsddb3 has a bug which prevents it from working with
446 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
447 # causes the DB_TRUNCATE flag to be passed, which is disallowed
448 # for databases protected by lock and transaction support
449 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
451 # Therefore, manually perform the removal (we can do this, because
452 # we know that for bsddb - but *not* anydbm in general - the database
453 # consists of one file with the name we specify, rather than several
454 # based on that name).
455 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
456 if os.path.isfile(filename):
457 os.unlink(filename)
458 mode = 'c'
460 self.db = anydbm.open(filename, mode)
462 def has_key(self, key):
463 return self.db.has_key(key)
465 def __getitem__(self, key):
466 return marshal.loads(self.db[key])
468 def __setitem__(self, key, value):
469 self.db[key] = marshal.dumps(value)
471 def __delitem__(self, key):
472 del self.db[key]
474 def get(self, key, default):
475 if self.has_key(key):
476 return self.__getitem__(key)
477 return default
479 def len(self):
480 return len(self.db)
483 class LastSymbolicNameDatabase:
484 """ Passing every CVSRevision in s-revs to this class will result in
485 a Database whose key is the last CVS Revision a symbolicname was
486 seen in, and whose value is a list of all symbolicnames that were
487 last seen in that revision."""
488 def __init__(self, mode):
489 self.symbols = {}
490 self.symbol_revs_db = Database(SYMBOL_LAST_CVS_REVS_DB, mode)
491 Cleanup().register(SYMBOL_LAST_CVS_REVS_DB, pass5)
493 # Once we've gone through all the revs,
494 # symbols.keys() will be a list of all tags and branches, and
495 # their corresponding values will be a key into the last CVS revision
496 # that they were used in.
497 def log_revision(self, c_rev):
498 # Gather last CVS Revision for symbolic name info and tag info
499 for tag in c_rev.tags:
500 self.symbols[tag] = c_rev.unique_key()
501 if c_rev.op is not OP_DELETE:
502 for branch in c_rev.branches:
503 self.symbols[branch] = c_rev.unique_key()
505 # Creates an inversion of symbols above--a dictionary of lists (key
506 # = CVS rev unique_key: val = list of symbols that close in that
507 # rev.
508 def create_database(self):
509 for sym, rev_unique_key in self.symbols.items():
510 if self.symbol_revs_db.has_key(rev_unique_key):
511 ary = self.symbol_revs_db[rev_unique_key]
512 ary.append(sym)
513 self.symbol_revs_db[rev_unique_key] = ary
514 else:
515 self.symbol_revs_db[rev_unique_key] = [sym]
518 class CVSRevisionDatabase:
519 """A Database to store CVSRevision objects and retrieve them by their
520 unique_key()."""
522 def __init__(self, mode):
523 """Initialize an instance, opening database in MODE (like the MODE
524 argument to Database or anydbm.open())."""
525 self.cvs_revs_db = Database(CVS_REVS_DB, mode)
526 Cleanup().register(CVS_REVS_DB, pass8)
528 def log_revision(self, c_rev):
529 """Add C_REV, a CVSRevision, to the database."""
530 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
532 def get_revision(self, unique_key):
533 """Return the CVSRevision stored under UNIQUE_KEY."""
534 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
537 class TagsDatabase(Database):
538 """A Database to store which symbolic names are tags.
539 Each key is a tag name.
540 The value has no meaning, and should be set to None."""
541 def __init__(self, mode):
542 Database.__init__(self, TAGS_DB, mode)
543 Cleanup().register(TAGS_DB, pass8)
546 class CVSRevision:
547 def __init__(self, ctx, *args):
548 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
550 If CTX is None, the following members and methods of the
551 instantiated CVSRevision class object will be unavailable (or
552 simply will not work correctly, if at all):
553 cvs_path
554 svn_path
555 svn_trunk_path
556 is_default_branch_revision()
558 (Note that this class treats CTX as const, because the caller
559 likely passed in a Borg instance of a Ctx. The reason this class
560 takes CTX as as a parameter, instead of just instantiating a Ctx
561 itself, is that this class should be usable outside cvs2svn.py.)
563 If there is one argument in ARGS, it is a string, in the format of
564 a line from a revs file. Do *not* include a trailing newline.
566 If there are multiple ARGS, there must be 15 of them,
567 comprising a parsed revs line:
568 timestamp --> (int) date stamp for this cvs revision
569 digest --> (string) digest of author+logmsg
570 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
571 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
572 rev --> (string) this CVS rev, e.g., "1.3"
573 next_rev --> (string or None) next CVS rev, e.g., "1.4"
574 file_in_attic --> (char or None) true if RCS file is in Attic
575 file_executable --> (char or None) true if RCS file has exec bit set.
576 file_size --> (int) size of the RCS file
577 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
578 mode --> (string or None) "kkv", "kb", etc.
579 branch_name --> (string or None) branch on which this rev occurred
580 tags --> (list of strings) all tags on this revision
581 branches --> (list of strings) all branches rooted in this rev
582 fname --> (string) relative path of file in CVS repos
584 The two forms of initialization are equivalent."""
586 self._ctx = ctx
587 if len(args) == 15:
588 (self.timestamp, self.digest, self.op, self.prev_rev, self.rev,
589 self.next_rev, self.file_in_attic, self.file_executable,
590 self.file_size, self.deltatext_code, self.fname,
591 self.mode, self.branch_name, self.tags, self.branches) = args
592 elif len(args) == 1:
593 data = args[0].split(' ', 13)
594 self.timestamp = int(data[0], 16)
595 self.digest = data[1]
596 self.op = data[2]
597 self.prev_rev = data[3]
598 if self.prev_rev == "*":
599 self.prev_rev = None
600 self.rev = data[4]
601 self.next_rev = data[5]
602 if self.next_rev == "*":
603 self.next_rev = None
604 self.file_in_attic = data[6]
605 if self.file_in_attic == "*":
606 self.file_in_attic = None
607 self.file_executable = data[7]
608 if self.file_executable == "*":
609 self.file_executable = None
610 self.file_size = int(data[8])
611 self.deltatext_code = data[9]
612 self.mode = data[10]
613 if self.mode == "*":
614 self.mode = None
615 self.branch_name = data[11]
616 if self.branch_name == "*":
617 self.branch_name = None
618 ntags = int(data[12])
619 tags = data[13].split(' ', ntags + 1)
620 nbranches = int(tags[ntags])
621 branches = tags[ntags + 1].split(' ', nbranches)
622 self.fname = branches[nbranches]
623 self.tags = tags[:ntags]
624 self.branches = branches[:nbranches]
625 else:
626 raise TypeError, 'CVSRevision() takes 2 or 12 arguments (%d given)' % \
627 (len(args) + 1)
628 if ctx is not None:
629 self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
630 self.svn_path = self._make_path(self.cvs_path, self.branch_name)
631 self.svn_trunk_path = self._make_path(self.cvs_path)
633 # The 'primary key' of a CVS Revision is the revision number + the
634 # filename. To provide a unique key (say, for a dict), we just glom
635 # them together in a string. By passing in self.prev_rev or
636 # self.next_rev, you can get the unique key for their respective
637 # CVSRevisions.
638 def unique_key(self, revnum=None):
639 if revnum is None:
640 revnum = self.rev
641 return revnum + "/" + self.fname
643 def __str__(self):
644 return ('%08lx %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
645 self.timestamp, self.digest, self.op,
646 (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
647 (self.file_in_attic or "*"), (self.file_executable or "*"),
648 self.file_size,
649 self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
650 len(self.tags), self.tags and " " or "", " ".join(self.tags),
651 len(self.branches), self.branches and " " or "", " ".join(self.branches),
652 self.fname, ))
654 # Returns true if this CVSRevision is the opening CVSRevision for
655 # NAME (for this RCS file).
656 def opens_symbolic_name(self, name):
657 if name in self.tags:
658 return 1
659 if name in self.branches:
660 return 1
661 return 0
663 def is_default_branch_revision(self):
664 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
665 revision according to DEFAULT_BRANCHES_DB (see the conditions
666 documented there), else return None."""
667 if self._ctx._default_branches_db.has_key(self.cvs_path):
668 val = self._ctx._default_branches_db[self.cvs_path]
669 val_last_dot = val.rindex(".")
670 our_last_dot = self.rev.rindex(".")
671 default_branch = val[:val_last_dot]
672 our_branch = self.rev[:our_last_dot]
673 default_rev_component = int(val[val_last_dot + 1:])
674 our_rev_component = int(self.rev[our_last_dot + 1:])
675 if (default_branch == our_branch
676 and our_rev_component <= default_rev_component):
677 return 1
678 # else
679 return None
681 def _make_path(self, path, branch_name = None):
682 """Return the trunk path or branch path for PATH.
684 If PATH is None, return None."""
685 # For a while, we treated each top-level subdir of the CVS
686 # repository as a "project root" and interpolated the appropriate
687 # genealogy (trunk|tag|branch) in according to the official
688 # recommended layout. For example, the path '/foo/bar/baz.c' on
689 # branch 'Rel2' would become
691 # /foo/branches/Rel2/bar/baz.c
693 # and on trunk it would become
695 # /foo/trunk/bar/baz.c
697 # However, we went back to the older and simpler method of just
698 # prepending the genealogy to the front, instead of interpolating.
699 # So now we produce:
701 # /branches/Rel2/foo/bar/baz.c
702 # /trunk/foo/bar/baz.c
704 # Why? Well, Jack Repenning pointed out that this way is much
705 # friendlier to "anonymously rooted subtrees" (that's a tree where
706 # the name of the top level dir doesn't matter, the point is that if
707 # you cd into it and, say, run 'make', something good will happen).
708 # By interpolating, we made it impossible to point cvs2svn at some
709 # subdir in the CVS repository and convert it as a project, because
710 # we'd treat every subdir underneath it as an independent project
711 # root, which is probably not what the user wanted.
713 # Also, see Blair Zajac's post
715 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
717 # and the surrounding thread, for why what people really want is a
718 # way of specifying an in-repository prefix path, not interpolation.
719 if path is None:
720 return None
722 if branch_name:
723 branch_name = _clean_symbolic_name(branch_name)
724 return self._ctx.branches_base + '/' + branch_name + '/' + path
725 else:
726 return self._ctx.trunk_base + '/' + path
728 def rcs_path(self):
729 """Returns the actual filesystem path to the RCS file of this
730 CVSRevision."""
731 if self.file_in_attic is None:
732 return self.fname
733 else:
734 basepath, filename = os.path.split(self.fname)
735 return os.path.join(basepath, 'Attic', filename)
737 def filename(self):
738 "Return the last path component of self.fname, minus the ',v'"
739 return os.path.split(self.fname)[-1][:-2]
741 class SymbolDatabase:
742 """This database records information on all symbols in the RCS
743 files. It is created in pass 1 and it is used in pass 2."""
744 def __init__(self):
745 # A hash that maps tag names to commit counts
746 self.tags = { }
747 # A hash that maps branch names to lists of the format
748 # [ create_count, commit_count, blockers ], where blockers
749 # is a hash that lists the symbols that depend on the
750 # the branch. The blockers hash is used as a set, so the
751 # values are not used.
752 self.branches = { }
754 def register_tag_creation(self, name):
755 """Register the creation of the tag NAME."""
756 if not self.tags.has_key(name):
757 self.tags[name] = 0
758 self.tags[name] += 1
760 def _branch(self, name):
761 """Helper function to get a branch node that will create and
762 initialize the node if it does not exist."""
763 if not self.branches.has_key(name):
764 self.branches[name] = [ 0, 0, { } ]
765 return self.branches[name]
767 def register_branch_creation(self, name):
768 """Register the creation of the branch NAME."""
769 self._branch(name)[0] += 1
771 def register_branch_commit(self, name):
772 """Register a commit on the branch NAME."""
773 self._branch(name)[1] += 1
775 def register_branch_blocker(self, name, blocker):
776 """Register BLOCKER as a blocker on the branch NAME."""
777 self._branch(name)[2][blocker] = None
779 def branch_has_commit(self, name):
780 """Return non-zero if NAME has commits. Returns 0 if name
781 is not a branch or if it has no commits."""
782 return self.branches.has_key(name) and self.branches[name][1]
784 def find_excluded_symbols(self, regexp_list):
785 """Returns a hash of all symbols thaht match the regexps in
786 REGEXP_LISTE. The hash is used as a set so the values are
787 not used."""
788 excludes = { }
789 for tag in self.tags.keys():
790 if match_regexp_list(regexp_list, tag):
791 excludes[tag] = None
792 for branch in self.branches.keys():
793 if match_regexp_list(regexp_list, branch):
794 excludes[branch] = None
795 return excludes
797 def find_branch_exclude_blockers(self, branch, excludes):
798 """Find all blockers of BRANCH, excluding the ones in the hash
799 EXCLUDES."""
800 blockers = { }
801 if excludes.has_key(branch):
802 for blocker in self.branches[branch][2]:
803 if not excludes.has_key(blocker):
804 blockers[blocker] = None
805 return blockers
807 def find_blocked_excludes(self, excludes):
808 """Find all branches not in EXCLUDES that have blocking symbols that
809 are not themselves excluded. Return a hash that maps branch names
810 to a hash of blockers. The hash of blockes is used as a set so the
811 values are not used."""
812 blocked_branches = { }
813 for branch in self.branches.keys():
814 blockers = self.find_branch_exclude_blockers(branch, excludes)
815 if blockers:
816 blocked_branches[branch] = blockers
817 return blocked_branches
819 def find_mismatches(self, excludes=None):
820 """Find all symbols that are defined as both tags and branches,
821 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
822 the symbol name, tag count, branch count and commit count."""
823 if excludes is None:
824 excludes = { }
825 mismatches = [ ]
826 for branch in self.branches.keys():
827 if not excludes.has_key(branch) and self.tags.has_key(branch):
828 mismatches.append((branch, # name
829 self.tags[branch], # tag count
830 self.branches[branch][0], # branch count
831 self.branches[branch][1])) # commit count
832 return mismatches
834 def read(self):
835 """Read the symbol database from files."""
836 f = open(TAGS_LIST)
837 while 1:
838 line = f.readline()
839 if not line:
840 break
841 tag, count = line.split()
842 self.tags[tag] = int(count)
844 f = open(BRANCHES_LIST)
845 while 1:
846 line = f.readline()
847 if not line:
848 break
849 words = line.split()
850 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
851 for blocker in words[3:]:
852 self.branches[words[0]][2][blocker] = None
854 def write(self):
855 """Store the symbol database to files."""
856 f = open(TAGS_LIST, "w")
857 for tag, count in self.tags.items():
858 f.write("%s %d\n" % (tag, count))
860 f = open(BRANCHES_LIST, "w")
861 for branch, info in self.branches.items():
862 f.write("%s %d %d" % (branch, info[0], info[1]))
863 if info[2]:
864 f.write(" ")
865 f.write(" ".join(info[2].keys()))
866 f.write("\n")
868 class CollectData(cvs2svn_rcsparse.Sink):
869 def __init__(self):
870 self.cvsroot = Ctx().cvsroot
871 self.revs = open(DATAFILE + REVS_SUFFIX, 'w')
872 Cleanup().register(DATAFILE + REVS_SUFFIX, pass2)
873 self.resync = open(DATAFILE + RESYNC_SUFFIX, 'w')
874 Cleanup().register(DATAFILE + RESYNC_SUFFIX, pass2)
875 self.default_branches_db = Database(DEFAULT_BRANCHES_DB, DB_OPEN_NEW)
876 Cleanup().register(DEFAULT_BRANCHES_DB, pass5)
877 self.metadata_db = Database(METADATA_DB, DB_OPEN_NEW)
878 Cleanup().register(METADATA_DB, pass8)
879 self.fatal_errors = []
880 self.num_files = 0
881 self.symbol_db = SymbolDatabase()
883 # 1 if we've collected data for at least one file, None otherwise.
884 self.found_valid_file = None
886 # See set_fname() for initializations of other variables.
888 def set_fname(self, canonical_name, filename):
889 """Prepare to receive data for FILENAME. FILENAME is the absolute
890 filesystem path to the file in question, and CANONICAL_NAME is
891 FILENAME with the 'Attic' component removed (if the file is indeed
892 in the Attic) ."""
893 self.fname = canonical_name
895 # We calculate and save some file metadata here, where we can do
896 # it only once per file, instead of waiting until later where we
897 # would have to do the same calculations once per CVS *revision*.
899 # If the paths are not the same, then that means that the
900 # canonical_name has had the 'Attic' component stripped out.
901 self.file_in_attic = None
902 if not canonical_name == filename:
903 self.file_in_attic = 1
905 file_stat = os.stat(filename)
906 # The size of our file in bytes
907 self.file_size = file_stat[stat.ST_SIZE]
909 # Whether or not the executable bit is set.
910 self.file_executable = None
911 if file_stat[0] & stat.S_IXUSR:
912 self.file_executable = 1
914 # revision -> [timestamp, author, old-timestamp]
915 self.rev_data = { }
917 # Maps revision number (key) to the revision number of the
918 # previous revision along this line of development.
920 # For the first revision R on a branch, we consider the revision
921 # from which R sprouted to be the 'previous'.
923 # Note that this revision can't be determined arithmetically (due
924 # to cvsadmin -o, which is why this is necessary).
925 self.prev_rev = { }
927 # This dict is essentially self.prev_rev with the values mapped in
928 # the other direction, so following key -> value will yield you
929 # the next revision number
930 self.next_rev = { }
932 # Track the state of each revision so that in set_revision_info,
933 # we can determine if our op is an add/change/delete. We can do
934 # this because in set_revision_info, we'll have all of the
935 # revisions for a file at our fingertips, and we need to examine
936 # the state of our prev_rev to determine if we're an add or a
937 # change--without the state of the prev_rev, we are unable to
938 # distinguish between an add and a change.
939 self.rev_state = { }
941 # Hash mapping branch numbers, like '1.7.2', to branch names,
942 # like 'Release_1_0_dev'.
943 self.branch_names = { }
945 # RCS flags (used for keyword expansion).
946 self.mode = None
948 # Hash mapping revision numbers, like '1.7', to lists of names
949 # indicating which branches sprout from that revision, like
950 # ['Release_1_0_dev', 'experimental_driver', ...].
951 self.branchlist = { }
953 # Like self.branchlist, but the values are lists of tag names that
954 # apply to the key revision.
955 self.taglist = { }
957 # If set, this is an RCS branch number -- rcsparse calls this the
958 # "principal branch", but CVS and RCS refer to it as the "default
959 # branch", so that's what we call it, even though the rcsparse API
960 # setter method is still 'set_principal_branch'.
961 self.default_branch = None
963 # If the RCS file doesn't have a default branch anymore, but does
964 # have vendor revisions, then we make an educated guess that those
965 # revisions *were* the head of the default branch up until the
966 # commit of 1.2, at which point the file's default branch became
967 # trunk. This records the date at which 1.2 was committed.
968 self.first_non_vendor_revision_date = None
970 def set_principal_branch(self, branch):
971 self.default_branch = branch
973 def set_expansion(self, mode):
974 self.mode = mode
976 def set_branch_name(self, branch_number, name):
977 """Record that BRANCH_NUMBER is the branch number for branch NAME,
978 and that NAME sprouts from BRANCH_NUMBER .
979 BRANCH_NUMBER is an RCS branch number with an odd number of components,
980 for example '1.7.2' (never '1.7.0.2')."""
981 if not self.branch_names.has_key(branch_number):
982 self.branch_names[branch_number] = name
983 # The branchlist is keyed on the revision number from which the
984 # branch sprouts, so strip off the odd final component.
985 sprout_rev = branch_number[:branch_number.rfind(".")]
986 if not self.branchlist.has_key(sprout_rev):
987 self.branchlist[sprout_rev] = []
988 self.branchlist[sprout_rev].append(name)
989 self.symbol_db.register_branch_creation(name)
990 else:
991 sys.stderr.write("%s: in '%s':\n"
992 " branch '%s' already has name '%s',\n"
993 " cannot also have name '%s', ignoring the latter\n"
994 % (warning_prefix, self.fname, branch_number,
995 self.branch_names[branch_number], name))
997 def rev_to_branch_name(self, revision):
998 """Return the name of the branch on which REVISION lies.
999 REVISION is a non-branch revision number with an even number of,
1000 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1001 For the convenience of callers, REVISION can also be a trunk
1002 revision such as '1.2', in which case just return None."""
1003 if trunk_rev.match(revision):
1004 return None
1005 return self.branch_names.get(revision[:revision.rindex(".")])
1007 def add_cvs_branch(self, revision, branch_name):
1008 """Record the root revision and branch revision for BRANCH_NAME,
1009 based on REVISION. REVISION is a CVS branch number having an even
1010 number of components where the second-to-last is '0'. For
1011 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1012 from 1.7 and has branch number 1.7.2."""
1013 last_dot = revision.rfind(".")
1014 branch_rev = revision[:last_dot]
1015 last2_dot = branch_rev.rfind(".")
1016 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1017 self.set_branch_name(branch_rev, branch_name)
1019 def define_tag(self, name, revision):
1020 """Record a bidirectional mapping between symbolic NAME and REVISION.
1021 REVISION is an unprocessed revision number from the RCS file's
1022 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1023 This function will determine what kind of symbolic name it is by
1024 inspection, and record it in the right places."""
1025 if not symbolic_name_re.match(name):
1026 sys.stderr.write("%s: in '%s':\n"
1027 " '%s' is not a valid tag or branch name, ignoring\n"
1028 % (warning_prefix, self.fname, name))
1029 return
1031 if branch_tag.match(revision):
1032 self.add_cvs_branch(revision, name)
1033 elif vendor_tag.match(revision):
1034 self.set_branch_name(revision, name)
1035 else:
1036 if not self.taglist.has_key(revision):
1037 self.taglist[revision] = []
1038 self.taglist[revision].append(name)
1039 self.symbol_db.register_tag_creation(name)
1041 def define_revision(self, revision, timestamp, author, state,
1042 branches, next):
1044 # Record the state of our revision for later calculations
1045 self.rev_state[revision] = state
1047 # store the rev_data as a list in case we have to jigger the timestamp
1048 self.rev_data[revision] = [int(timestamp), author, None]
1050 # When on trunk, the RCS 'next' revision number points to what
1051 # humans might consider to be the 'previous' revision number. For
1052 # example, 1.3's RCS 'next' is 1.2.
1054 # However, on a branch, the RCS 'next' revision number really does
1055 # point to what humans would consider to be the 'next' revision
1056 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1058 # In other words, in RCS, 'next' always means "where to find the next
1059 # deltatext that you need this revision to retrieve.
1061 # That said, we don't *want* RCS's behavior here, so we determine
1062 # whether we're on trunk or a branch and set self.prev_rev
1063 # accordingly.
1065 # One last thing. Note that if REVISION is a branch revision,
1066 # instead of mapping REVISION to NEXT, we instead map NEXT to
1067 # REVISION. Since we loop over all revisions in the file before
1068 # doing anything with the data we gather here, this 'reverse
1069 # assignment' effectively does the following:
1071 # 1. Gives us no 'prev' value for REVISION (in this
1072 # iteration... it may have been set in a previous iteration)
1074 # 2. Sets the 'prev' value for the revision with number NEXT to
1075 # REVISION. So when we come around to the branch revision whose
1076 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1077 # set.
1078 if trunk_rev.match(revision):
1079 self.prev_rev[revision] = next
1080 self.next_rev[next] = revision
1081 elif next:
1082 self.prev_rev[next] = revision
1083 self.next_rev[revision] = next
1085 for b in branches:
1086 self.prev_rev[b] = revision
1088 # Ratchet up the highest vendor head revision, if necessary.
1089 if self.default_branch:
1090 if revision.find(self.default_branch + ".") == 0:
1091 # This revision is on the default branch, so record that it is
1092 # the new highest vendor head revision.
1093 rel_name = relative_name(self.cvsroot, self.fname)[:-2]
1094 self.default_branches_db[rel_name] = revision
1095 else:
1096 # No default branch, so make an educated guess.
1097 if revision == '1.2':
1098 # This is probably the time when the file stopped having a
1099 # default branch, so make a note of it.
1100 self.first_non_vendor_revision_date = timestamp
1101 else:
1102 m = vendor_revision.match(revision)
1103 if m and ((not self.first_non_vendor_revision_date)
1104 or (timestamp < self.first_non_vendor_revision_date)):
1105 # We're looking at a vendor revision, and it wasn't
1106 # committed after this file lost its default branch, so bump
1107 # the maximum trunk vendor revision in the permanent record.
1108 rel_name = relative_name(self.cvsroot, self.fname)[:-2]
1109 self.default_branches_db[rel_name] = revision
1111 if not trunk_rev.match(revision):
1112 # Check for unlabeled branches, record them. We tried to collect
1113 # all branch names when we parsed the symbolic name header
1114 # earlier, of course, but that didn't catch unlabeled branches.
1115 # If a branch is unlabeled, this is our first encounter with it,
1116 # so we have to record its data now.
1117 branch_number = revision[:revision.rindex(".")]
1118 if not self.branch_names.has_key(branch_number):
1119 branch_name = "unlabeled-" + branch_number
1120 self.set_branch_name(branch_number, branch_name)
1122 # Register the commit on this non-trunk branch
1123 branch_name = self.branch_names[branch_number]
1124 self.symbol_db.register_branch_commit(branch_name)
1126 def tree_completed(self):
1127 "The revision tree has been parsed. Analyze it for consistency."
1129 # Our algorithm depends upon the timestamps on the revisions occuring
1130 # monotonically over time. That is, we want to see rev 1.34 occur in
1131 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1132 # sorting), and then tried to insert 1.34, we'd be screwed.
1134 # to perform the analysis, we'll simply visit all of the 'previous'
1135 # links that we have recorded and validate that the timestamp on the
1136 # previous revision is before the specified revision
1138 # if we have to resync some nodes, then we restart the scan. just keep
1139 # looping as long as we need to restart.
1140 while 1:
1141 for current, prev in self.prev_rev.items():
1142 if not prev:
1143 # no previous revision exists (i.e. the initial revision)
1144 continue
1145 t_c = self.rev_data[current][0]
1146 t_p = self.rev_data[prev][0]
1147 if t_p >= t_c:
1148 # the previous revision occurred later than the current revision.
1149 # shove the previous revision back in time (and any before it that
1150 # may need to shift).
1151 while t_p >= t_c:
1152 self.rev_data[prev][0] = t_c - 1 # new timestamp
1153 self.rev_data[prev][2] = t_p # old timestamp
1155 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1156 % (relative_name(self.cvsroot, self.fname),
1157 prev, time.ctime(t_p), t_c - 1 - t_p)
1158 Log().write(LOG_VERBOSE, msg)
1160 current = prev
1161 prev = self.prev_rev[current]
1162 if not prev:
1163 break
1164 t_c = t_c - 1 # self.rev_data[current][0]
1165 t_p = self.rev_data[prev][0]
1167 # break from the for-loop
1168 break
1169 else:
1170 # finished the for-loop (no resyncing was performed)
1171 return
1173 def set_revision_info(self, revision, log, text):
1174 timestamp, author, old_ts = self.rev_data[revision]
1175 digest = sha.new(log + '\0' + author).hexdigest()
1176 if old_ts:
1177 # the timestamp on this revision was changed. log it for later
1178 # resynchronization of other files's revisions that occurred
1179 # for this time and log message.
1180 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1182 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1183 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1185 # If revision 1.1 appears to have been created via 'cvs add'
1186 # instead of 'cvs import', then this file probably never had a
1187 # default branch, so retroactively remove its record in the
1188 # default branches db. The test is that the log message CVS uses
1189 # for 1.1 in imports is "Initial revision\n" with no period.
1190 if revision == '1.1' and log != 'Initial revision\n':
1191 rel_name = relative_name(self.cvsroot, self.fname)[:-2]
1192 if self.default_branches_db.has_key(rel_name):
1193 del self.default_branches_db[rel_name]
1195 # How to tell if a CVSRevision is an add, a change, or a deletion:
1197 # It's a delete if RCS state is 'dead'
1199 # It's an add if RCS state is 'Exp.' and
1200 # - we either have no previous revision
1201 # or
1202 # - we have a previous revision whose state is 'dead'
1204 # Anything else is a change.
1205 if self.rev_state[revision] == 'dead':
1206 op = OP_DELETE
1207 elif ((self.prev_rev.get(revision, None) is None)
1208 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1209 op = OP_ADD
1210 else:
1211 op = OP_CHANGE
1213 if text:
1214 deltatext_code = DELTATEXT_NONEMPTY
1215 else:
1216 deltatext_code = DELTATEXT_EMPTY
1218 c_rev = CVSRevision(Ctx(), timestamp, digest, op,
1219 self.prev_rev[revision], revision,
1220 self.next_rev.get(revision),
1221 self.file_in_attic, self.file_executable,
1222 self.file_size,
1223 deltatext_code, self.fname,
1224 self.mode, self.rev_to_branch_name(revision),
1225 self.taglist.get(revision, []),
1226 self.branchlist.get(revision, []))
1227 self.revs.write(str(c_rev) + "\n")
1229 if not self.metadata_db.has_key(digest):
1230 self.metadata_db[digest] = (author, log)
1232 def parse_completed(self):
1233 # Walk through all branches and tags and register them with
1234 # their parent branch in the symbol database.
1235 for revision, symbols in self.taglist.items() + self.branchlist.items():
1236 for symbol in symbols:
1237 name = self.rev_to_branch_name(revision)
1238 if name is not None:
1239 self.symbol_db.register_branch_blocker(name, symbol)
1241 self.num_files = self.num_files + 1
1243 def write_symbol_db(self):
1244 self.symbol_db.write()
1246 class SymbolingsLogger:
1247 """Manage the file that contains lines for symbol openings and
1248 closings.
1250 This data will later be used to determine valid SVNRevision ranges
1251 from which a file can be copied when creating a branch or tag in
1252 Subversion. Do this by finding "Openings" and "Closings" for each
1253 file copied onto a branch or tag.
1255 An "Opening" is the CVSRevision from which a given branch/tag
1256 sprouts on a path.
1258 The "Closing" for that branch/tag and path is the next CVSRevision
1259 on the same line of development as the opening.
1261 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1262 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1263 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1264 'foo.c'. Note that there may be many revisions chronologically
1265 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1266 perhaps even including on branch BEE itself. But 1.3 is the next
1267 revision *on the same line* as 1.2, that is why it is the closing
1268 revision for those symbolic names of which 1.2 is the opening.
1270 The reason for doing all this hullabaloo is to make branch and tag
1271 creation as efficient as possible by minimizing the number of copies
1272 and deletes per creation. For example, revisions 1.2 and 1.3 of
1273 foo.c might correspond to revisions 17 and 30 in Subversion. That
1274 means that when creating branch BEE, there is some motivation to do
1275 the copy from one of 17-30. Now if there were another file,
1276 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1277 to revisions 24 and 39 in Subversion, we would know that the ideal
1278 thing would be to copy the branch from somewhere between 24 and 29,
1279 inclusive.
1281 def __init__(self):
1282 self.symbolings = open(SYMBOL_OPENINGS_CLOSINGS, 'w')
1283 Cleanup().register(SYMBOL_OPENINGS_CLOSINGS, pass6)
1284 self.closings = open(SYMBOL_CLOSINGS_TMP, 'w')
1285 Cleanup().register(SYMBOL_CLOSINGS_TMP, pass5)
1287 # This keys of this dictionary are Subversion repository *source*
1288 # paths for which we've encountered an 'opening'. The values are
1289 # the symbolic names that this path has opened. The only paths
1290 # that should be in this dict are paths whose corresponding
1291 # CVSRevision is a default branch revision.
1292 self.open_paths_with_default_branches = { }
1294 def log_revision(self, c_rev, svn_revnum):
1295 """Log any openings found in C_REV, and if C_REV.next_rev is not
1296 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1297 any) will have its revnum determined later."""
1298 for name in c_rev.tags + c_rev.branches:
1299 name = _clean_symbolic_name(name)
1300 self._note_default_branch_opening(c_rev, name)
1301 if c_rev.op != OP_DELETE:
1302 self._log(name, svn_revnum, c_rev.svn_path, OPENING)
1304 # If our c_rev has a next_rev, then that's the closing rev for
1305 # this source revision. Log it to closings for later processing
1306 # since we don't know the svn_revnum yet.
1307 if c_rev.next_rev is not None:
1308 self.closings.write('%s %s\n' %
1309 (name, c_rev.unique_key(c_rev.next_rev)))
1311 def _log(self, name, svn_revnum, svn_path, type):
1312 """Write out a single line to the symbol_openings_closings file
1313 representing that svn_revnum of svn_path is either the opening or
1314 closing (TYPE) of NAME (a symbolic name).
1316 TYPE should only be one of the following global constants:
1317 OPENING or CLOSING."""
1318 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1319 self.symbolings.write('%s %.8d %s %s\n' % (name, svn_revnum,
1320 type, svn_path))
1322 def close(self):
1323 """Iterate through the closings file, lookup the svn_revnum for
1324 each closing CVSRevision, and write a proper line out to the
1325 symbolings file."""
1326 # Use this to get the c_rev.svn_path of our rev_key
1327 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1329 self.closings.close()
1330 for line in fileinput.FileInput(SYMBOL_CLOSINGS_TMP):
1331 (name, rev_key) = line.rstrip().split(" ", 1)
1332 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1334 c_rev = cvs_revs_db.get_revision(rev_key)
1335 self._log(name, svn_revnum, c_rev.svn_path, CLOSING)
1337 self.symbolings.close()
1339 def _note_default_branch_opening(self, c_rev, symbolic_name):
1340 """If C_REV is a default branch revision, log C_REV.svn_trunk_path
1341 as an opening for SYMBOLIC_NAME."""
1342 path = c_rev.svn_trunk_path
1343 if not self.open_paths_with_default_branches.has_key(path):
1344 self.open_paths_with_default_branches[path] = [ ]
1345 self.open_paths_with_default_branches[path].append(symbolic_name)
1347 def log_default_branch_closing(self, c_rev, svn_revnum):
1348 """If self.open_paths_with_default_branches contains
1349 C_REV.svn_trunk_path, then call log each name in
1350 self.open_paths_with_default_branches[C_REV.svn_trunk_path] as a
1351 closing with SVN_REVNUM as the closing revision number. """
1352 path = c_rev.svn_trunk_path
1353 if self.open_paths_with_default_branches.has_key(path):
1354 # log each symbol as a closing
1355 for name in self.open_paths_with_default_branches[path]:
1356 self._log(name, svn_revnum, path, CLOSING)
1357 # Remove them from the openings list as we're done with them.
1358 del self.open_paths_with_default_branches[path]
1361 class PersistenceManager:
1362 """The PersistenceManager allows us to effectively store SVNCommits
1363 to disk and retrieve them later using only their subversion revision
1364 number as the key. It also returns the subversion revision number
1365 for a given CVSRevision's unique key.
1367 All information pertinent to each SVNCommit is stored in a series of
1368 on-disk databases so that SVNCommits can be retrieved on-demand.
1370 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1371 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1372 databases and be fully-featured.
1373 In 'read' mode, PersistenceManager will open existing on-disk databases
1374 and the set_* methods will be unavailable."""
1375 def __init__(self, mode):
1376 self.mode = mode
1377 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1378 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1379 self.svn2cvs_db = Database(SVN_REVNUMS_TO_CVS_REVS, mode)
1380 Cleanup().register(SVN_REVNUMS_TO_CVS_REVS, pass8)
1381 self.cvs2svn_db = Database(CVS_REVS_TO_SVN_REVNUMS, mode)
1382 Cleanup().register(CVS_REVS_TO_SVN_REVNUMS, pass8)
1383 self.svn_commit_names_dates = Database(SVN_COMMIT_NAMES_DATES, mode)
1384 Cleanup().register(SVN_COMMIT_NAMES_DATES, pass8)
1385 self.svn_commit_metadata = Database(METADATA_DB, DB_OPEN_READ)
1386 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1387 ###PERF kff Elsewhere there are comments about sucking the tags db
1388 ### into memory. That seems like a good idea.
1389 if not Ctx().trunk_only:
1390 self.tags_db = TagsDatabase(DB_OPEN_READ)
1391 self.motivating_revnums = Database(MOTIVATING_REVNUMS, mode)
1392 Cleanup().register(MOTIVATING_REVNUMS, pass8)
1394 # "branch_name" -> svn_revnum in which branch was last filled.
1395 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1396 # revision which would have nothing to do.
1397 self.last_filled = {}
1399 def total_revs(self):
1400 """Return the total number of Subversion revisions."""
1401 return self.svn2cvs_db.len()
1403 def get_svn_revnum(self, cvs_rev_unique_key):
1404 """Return the Subversion revision number in which
1405 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
1406 is no mapping for CVS_REV_UNIQUE_KEY."""
1407 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
1409 def get_svn_commit(self, svn_revnum):
1410 """Return an SVNCommit that corresponds to SVN_REVNUM.
1412 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
1414 This method can throw SVNCommitInternalInconsistencyError.
1416 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
1417 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
1418 if c_rev_keys == None:
1419 return None
1421 digest = None
1422 for key in c_rev_keys:
1423 c_rev = self.cvs_revisions.get_revision(key)
1424 svn_commit.add_revision(c_rev)
1425 # Set the author and log message for this commit by using
1426 # CVSRevision metadata, but only if haven't done so already.
1427 if digest is None:
1428 digest = c_rev.digest
1429 author, log_msg = self.svn_commit_metadata[digest]
1430 svn_commit.set_author(author)
1431 svn_commit.set_log_msg(log_msg)
1433 # If we're doing a trunk-only conversion, we don't need to do any more work.
1434 if Ctx().trunk_only:
1435 return svn_commit
1437 name, date = self._get_name_and_date(svn_revnum)
1438 if name:
1439 svn_commit.set_symbolic_name(name)
1440 svn_commit.set_date(date)
1441 if self.tags_db.has_key(name):
1442 svn_commit.is_tag = 1
1444 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
1445 if motivating_revnum:
1446 svn_commit.set_motivating_revnum(int(motivating_revnum))
1447 svn_commit.set_date(date)
1449 if len(svn_commit.cvs_revs) and name:
1450 msg = """An SVNCommit cannot have cvs_revisions *and* a
1451 corresponding symbolic name ('%s') to fill.""" % name
1452 raise SVNCommit.SVNCommitInternalInconsistencyError(msg)
1454 return svn_commit
1456 def set_cvs_revs(self, svn_revnum, cvs_revs):
1457 """Record the bidirectional mapping between SVN_REVNUM and
1458 CVS_REVS."""
1459 if self.mode == DB_OPEN_READ:
1460 raise RuntimeError, \
1461 'Write operation attempted on read-only PersistenceManager'
1462 for c_rev in cvs_revs:
1463 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
1464 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
1465 for c_rev in cvs_revs:
1466 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
1468 def set_name_and_date(self, svn_revnum, name, date):
1469 """Associate symbolic name NAME and DATE with SVN_REVNUM."""
1470 if self.mode == DB_OPEN_READ:
1471 raise RuntimeError, \
1472 'Write operation attempted on read-only PersistenceManager'
1473 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
1474 self.last_filled[name] = svn_revnum
1476 def _get_name_and_date(self, svn_revnum):
1477 """Return a tuple containing the symbolic name and date associated
1478 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
1479 associated with it."""
1480 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
1482 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
1483 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
1484 if self.mode == DB_OPEN_READ:
1485 raise RuntimeError, \
1486 'Write operation attempted on read-only PersistenceManager'
1487 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
1490 class CVSCommit:
1491 """Each instance of this class contains a number of CVS Revisions
1492 that correspond to one or more Subversion Commits. After all CVS
1493 Revisions are added to the grouping, calling process_revisions will
1494 generate a Subversion Commit (or Commits) for the set of CVS
1495 Revisions in the grouping."""
1497 def __init__(self, digest, author, log):
1498 self.digest = digest
1499 self.author = author
1500 self.log = log
1502 # Symbolic names for which the last source revision has already
1503 # been seen and for which the CVSRevisionAggregator has already
1504 # generated a fill SVNCommit. See self.process_revisions().
1505 self.done_symbols = [ ]
1507 self.files = { }
1508 # Lists of CVSRevisions
1509 self.changes = [ ]
1510 self.deletes = [ ]
1512 # Start out with a t_min higher than any incoming time T, and a
1513 # t_max lower than any incoming T. This way the first T will
1514 # push t_min down to T, and t_max up to T, naturally (without any
1515 # special-casing), and successive times will then ratchet them
1516 # outward as appropriate.
1517 self.t_min = 1L<<32
1518 self.t_max = 0
1520 # This will be set to the SVNCommit that occurs in self._commit.
1521 self.motivating_commit = None
1523 # This is a list of all non-primary commits motivated by the main
1524 # commit. We gather these so that we can set their dates to the
1525 # same date as the primary commit.
1526 self.secondary_commits = [ ]
1528 # State for handling default branches.
1530 # Here is a tempting, but ultimately nugatory, bit of logic, which
1531 # I share with you so you may appreciate the less attractive, but
1532 # refreshingly non-nugatory, logic which follows it:
1534 # If some of the commits in this txn happened on a non-trunk
1535 # default branch, then those files will have to be copied into
1536 # trunk manually after being changed on the branch (because the
1537 # RCS "default branch" appears as head, i.e., trunk, in practice).
1538 # As long as those copies don't overwrite any trunk paths that
1539 # were also changed in this commit, then we can do the copies in
1540 # the same revision, because they won't cover changes that don't
1541 # appear anywhere/anywhen else. However, if some of the trunk dst
1542 # paths *did* change in this commit, then immediately copying the
1543 # branch changes would lose those trunk mods forever. So in this
1544 # case, we need to do at least that copy in its own revision. And
1545 # for simplicity's sake, if we're creating the new revision for
1546 # even one file, then we just do all such copies together in the
1547 # new revision.
1549 # Doesn't that sound nice?
1551 # Unfortunately, Subversion doesn't support copies with sources
1552 # in the current txn. All copies must be based in committed
1553 # revisions. Therefore, we generate the above-described new
1554 # revision unconditionally.
1556 # This is a list of c_revs, and a c_rev is appended for each
1557 # default branch commit that will need to be copied to trunk (or
1558 # deleted from trunk) in some generated revision following the
1559 # "regular" revision.
1560 self.default_branch_cvs_revisions = [ ]
1562 def __cmp__(self, other):
1563 # Commits should be sorted by t_max. If both self and other have
1564 # the same t_max, break the tie using t_min, and lastly, digest
1565 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
1566 or cmp(self.digest, other.digest))
1568 def has_file(self, fname):
1569 return self.files.has_key(fname)
1571 def revisions(self):
1572 return self.changes + self.deletes
1574 def opens_symbolic_name(self, name):
1575 """Returns true if any CVSRevision in this commit is on a tag or a
1576 branch or is the origin of a tag or branch."""
1577 for c_rev in self.revisions():
1578 if c_rev.opens_symbolic_name(name):
1579 return 1
1580 return 0
1582 def add_revision(self, c_rev):
1583 # Record the time range of this commit.
1585 # ### ISSUE: It's possible, though unlikely, that the time range
1586 # of a commit could get gradually expanded to be arbitrarily
1587 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
1588 # problem, and anyway deciding where to break it up would be a
1589 # judgement call. For now, we just print a warning in commit() if
1590 # this happens.
1591 if c_rev.timestamp < self.t_min:
1592 self.t_min = c_rev.timestamp
1593 if c_rev.timestamp > self.t_max:
1594 self.t_max = c_rev.timestamp
1596 if c_rev.op == OP_DELETE:
1597 self.deletes.append(c_rev)
1598 else:
1599 # OP_CHANGE or OP_ADD
1600 self.changes.append(c_rev)
1602 self.files[c_rev.fname] = 1
1604 def _pre_commit(self):
1605 """Generates any SVNCommits that must exist before the main
1606 commit."""
1608 # There may be multiple c_revs in this commit that would cause
1609 # branch B to be filled, but we only want to fill B once. On the
1610 # other hand, there might be multiple branches committed on in
1611 # this commit. Whatever the case, we should count exactly one
1612 # commit per branch, because we only fill a branch once per
1613 # CVSCommit. This list tracks which branches we've already
1614 # counted.
1615 accounted_for_sym_names = [ ]
1617 def fill_needed(c_rev, pm):
1618 """Return 1 if this is the first commit on a new branch (for
1619 this file) and we need to fill the branch; else return 0
1620 (meaning that some other file's first commit on the branch has
1621 already done the fill for us).
1623 If C_REV.op is OP_ADD, only return 1 if the branch that this
1624 commit is on has no last filled revision.
1626 PM is a PersistenceManager to query.
1629 # Different '.' counts indicate that c_rev is now on a different
1630 # line of development (and may need a fill)
1631 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
1632 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
1633 # It should be the case that when we have a file F that
1634 # is added on branch B (thus, F on trunk is in state
1635 # 'dead'), we generate an SVNCommit to fill B iff the branch
1636 # has never been filled before.
1638 # If this c_rev.op == OP_ADD, *and* the branch has never
1639 # been filled before, then fill it now. Otherwise, no need to
1640 # fill it.
1641 if c_rev.op == OP_ADD:
1642 if pm.last_filled.get(c_rev.branch_name, None) is None:
1643 return 1
1644 else:
1645 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
1646 return 1
1647 return 0
1649 for c_rev in self.changes + self.deletes:
1650 # If a commit is on a branch, we must ensure that the branch
1651 # path being committed exists (in HEAD of the Subversion
1652 # repository). If it doesn't exist, we will need to fill the
1653 # branch. After the fill, the path on which we're committing
1654 # will exist.
1655 if c_rev.branch_name \
1656 and c_rev.branch_name not in accounted_for_sym_names \
1657 and c_rev.branch_name not in self.done_symbols \
1658 and fill_needed(c_rev, Ctx()._persistence_manager):
1659 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
1660 % c_rev.branch_name)
1661 svn_commit.set_symbolic_name(c_rev.branch_name)
1662 self.secondary_commits.append(svn_commit)
1663 accounted_for_sym_names.append(c_rev.branch_name)
1665 def _commit(self):
1666 """Generates the primary SVNCommit that corresponds the this
1667 CVSCommit."""
1668 # Generate an SVNCommit unconditionally. Even if the only change
1669 # in this CVSCommit is a deletion of an already-deleted file (that
1670 # is, a CVS revision in state 'dead' whose predecessor was also in
1671 # state 'dead'), the conversion will still generate a Subversion
1672 # revision containing the log message for the second dead
1673 # revision, because we don't want to lose that information.
1674 svn_commit = SVNCommit("commit")
1675 self.motivating_commit = svn_commit
1677 for c_rev in self.changes:
1678 svn_commit.add_revision(c_rev)
1679 # Only make a change if we need to. When 1.1.1.1 has an empty
1680 # deltatext, the explanation is almost always that we're looking
1681 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
1682 # such imports, CVS creates an RCS file where 1.1 has the
1683 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
1684 # content as 1.1. There's no reason to reflect this non-change
1685 # in the repository, so we want to do nothing in this case. (If
1686 # we were really paranoid, we could make sure 1.1's log message
1687 # is the CVS-generated "Initial revision\n", but I think the
1688 # conditions below are strict enough.)
1689 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
1690 and (c_rev.rev == "1.1.1.1")):
1691 if c_rev.is_default_branch_revision():
1692 self.default_branch_cvs_revisions.append(c_rev)
1694 for c_rev in self.deletes:
1695 # When a file is added on a branch, CVS not only adds the file
1696 # on the branch, but generates a trunk revision (typically
1697 # 1.1) for that file in state 'dead'. We only want to add
1698 # this revision if the log message is not the standard cvs
1699 # fabricated log message.
1700 if c_rev.prev_rev is None:
1701 # c_rev.branches may be empty if the originating branch
1702 # has been excluded.
1703 if not c_rev.branches:
1704 continue
1705 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
1706 % (c_rev.filename(),
1707 c_rev.branches[0]))
1708 author, log_msg = \
1709 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
1710 if log_msg == cvs_generated_msg:
1711 continue
1713 svn_commit.add_revision(c_rev)
1714 if c_rev.is_default_branch_revision():
1715 self.default_branch_cvs_revisions.append(c_rev)
1717 # There is a slight chance that we didn't actually register any
1718 # CVSRevisions with our SVNCommit (see loop over self.deletes
1719 # above), so if we have no CVSRevisions, we don't flush the
1720 # svn_commit to disk and roll back our revnum.
1721 if len(svn_commit.cvs_revs) > 0:
1722 svn_commit.flush()
1723 else:
1724 # We will not be flushing this SVNCommit, so rollback the
1725 # SVNCommit revision counter.
1726 SVNCommit.revnum = SVNCommit.revnum - 1
1728 if not Ctx().trunk_only:
1729 for c_rev in self.revisions():
1730 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
1732 def _post_commit(self):
1733 """Generates any SVNCommits that we can perform now that _commit
1734 has happened. That is, handle non-trunk default branches.
1735 Sometimes an RCS file has a non-trunk default branch, so a commit
1736 on that default branch would be visible in a default CVS checkout
1737 of HEAD. If we don't copy that commit over to Subversion's trunk,
1738 then there will be no Subversion tree which corresponds to that
1739 CVS checkout. Of course, in order to copy the path over, we may
1740 first need to delete the existing trunk there. """
1742 # Only generate a commit if we have default branch revs
1743 if len(self.default_branch_cvs_revisions):
1744 # Generate an SVNCommit for all of our default branch c_revs.
1745 svn_commit = SVNCommit("post-commit default branch(es)")
1746 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
1747 for c_rev in self.default_branch_cvs_revisions:
1748 svn_commit.add_revision(c_rev)
1749 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
1750 svn_commit.revnum)
1751 self.secondary_commits.append(svn_commit)
1753 def process_revisions(self, done_symbols):
1754 """Process all the CVSRevisions that this instance has, creating
1755 one or more SVNCommits in the process. Generate fill SVNCommits
1756 only for symbols not in DONE_SYMBOLS (avoids unnecessary
1757 fills).
1759 Return the primary SVNCommit that corresponds to this CVSCommit.
1760 The returned SVNCommit is the commit that motivated any other
1761 SVNCommits generated in this CVSCommit."""
1762 self.done_symbols = done_symbols
1763 seconds = self.t_max - self.t_min + 1
1765 Log().write(LOG_VERBOSE, '-' * 60)
1766 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
1767 if seconds == 1:
1768 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
1769 % time.ctime(self.t_max))
1770 else:
1771 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
1772 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
1773 % (time.ctime(self.t_max), seconds))
1775 if seconds > COMMIT_THRESHOLD + 1:
1776 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
1777 % (warning_prefix, COMMIT_THRESHOLD))
1779 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
1780 self._commit()
1781 return self.motivating_commit
1783 self._pre_commit()
1784 self._commit()
1785 self._post_commit()
1787 for svn_commit in self.secondary_commits:
1788 svn_commit.set_date(self.motivating_commit.get_date())
1789 svn_commit.flush()
1791 return self.motivating_commit
1794 class SVNCommit:
1795 """This represents one commit to the Subversion Repository. There
1796 are three types of SVNCommits:
1798 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
1800 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
1802 3. Updates trunk to reflect the contents of a particular branch
1803 (this is to handle RCS default branches)."""
1805 # The revision number to assign to the next new SVNCommit.
1806 # We start at 2 because SVNRepositoryMirror uses the first commit
1807 # to create trunk, tags, and branches.
1808 revnum = 2
1810 class SVNCommitInternalInconsistencyError(Exception):
1811 """Exception raised if we encounter an impossible state in the
1812 SVNCommit Databases."""
1813 pass
1815 def __init__(self, description="", revnum=None, cvs_revs=None):
1816 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
1817 If REVNUM, the SVNCommit will correspond to that revision number;
1818 and if CVS_REVS, then they must be the exact set of CVSRevisions for
1819 REVNUM.
1821 It is an error to pass CVS_REVS without REVNUM, but you may pass
1822 REVNUM without CVS_REVS, and then add a revision at a time by
1823 invoking add_revision()."""
1824 self._description = description
1826 # Revprop metadata for this commit.
1828 # These initial values are placeholders. At least the log and the
1829 # date should be different by the time these are used.
1831 # They are private because their values should be returned encoded
1832 # in UTF8, but callers aren't required to set them in UTF8.
1833 # Therefore, accessor methods are used to set them, and
1834 # self.get_revprops() is used to to get them, in dictionary form.
1835 self._author = Ctx().username
1836 self._log_msg = "This log message means an SVNCommit was used too soon."
1837 self._max_date = 0 # Latest date seen so far.
1839 self.cvs_revs = cvs_revs or []
1840 if revnum:
1841 self.revnum = revnum
1842 else:
1843 self.revnum = SVNCommit.revnum
1844 SVNCommit.revnum = SVNCommit.revnum + 1
1846 # The symbolic name that is filled in this SVNCommit, if any
1847 self.symbolic_name = None
1849 # If this commit is a default branch synchronization, this
1850 # variable represents the subversion revision number of the
1851 # *primary* commit where the default branch changes actually
1852 # happened. It is None otherwise.
1854 # It is possible for multiple for multiple synchronization commits
1855 # to refer to the same motivating commit revision number, and it
1856 # is possible for a single synchronization commit to contain
1857 # CVSRevisions on multiple different default branches.
1858 self.motivating_revnum = None
1860 # is_tag is true only if this commit is a fill of a symbolic name
1861 # that is a tag, None in all other cases.
1862 self.is_tag = None
1864 def set_symbolic_name(self, name):
1865 "Set self.symbolic_name to NAME."
1866 name = _clean_symbolic_name(name)
1867 self.symbolic_name = name
1869 def set_motivating_revnum(self, revnum):
1870 "Set self.motivating_revnum to REVNUM."
1871 self.motivating_revnum = revnum
1873 def set_author(self, author):
1874 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
1875 This is the only way to set an SVNCommit's author."""
1876 self._author = author
1878 def set_log_msg(self, msg):
1879 """Set this SVNCommit's log message to MSG (a locally-encoded string).
1880 This is the only way to set an SVNCommit's log message."""
1881 self._log_msg = msg
1883 def set_date(self, date):
1884 """Set this SVNCommit's date to DATE (an integer).
1885 Note that self.add_revision() updates this automatically based on
1886 a CVSRevision; so you may not need to call this at all, and even
1887 if you do, the value may be overwritten by a later call to
1888 self.add_revision()."""
1889 self._max_date = date
1891 def get_date(self):
1892 """Returns this SVNCommit's date as an integer."""
1893 return self._max_date
1895 def get_revprops(self):
1896 """Return the Subversion revprops for this SVNCommit."""
1897 date = format_date(self._max_date)
1898 try:
1899 ### FIXME: The 'replace' behavior should be an option, like
1900 ### --encoding is.
1901 utf8_author = None
1902 if self._author is not None:
1903 unicode_author = unicode(self._author, Ctx().encoding, 'replace')
1904 utf8_author = unicode_author.encode('utf8')
1905 unicode_log = unicode(self.get_log_msg(), Ctx().encoding, 'replace')
1906 utf8_log = unicode_log.encode('utf8')
1907 return { 'svn:author' : utf8_author,
1908 'svn:log' : utf8_log,
1909 'svn:date' : date }
1910 except UnicodeError:
1911 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
1912 % warning_prefix)
1913 Log().write(LOG_WARN, " author: '%s'" % self._author)
1914 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
1915 Log().write(LOG_WARN, " date: '%s'" % date)
1916 Log().write(LOG_WARN, "(subversion rev %s) Related files:" % self.revnum)
1917 for c_rev in self.cvs_revs:
1918 Log().write(LOG_WARN, " ", c_rev.fname)
1920 Log().write(LOG_WARN, "Consider rerunning with (for example)",
1921 "'--encoding=latin1'.\n")
1922 # It's better to fall back to the original (unknown encoding) data
1923 # than to either 1) quit or 2) record nothing at all.
1924 return { 'svn:author' : self._author,
1925 'svn:log' : self.get_log_msg(),
1926 'svn:date' : date }
1928 def add_revision(self, cvs_rev):
1929 self.cvs_revs.append(cvs_rev)
1930 if cvs_rev.timestamp > self._max_date:
1931 self._max_date = cvs_rev.timestamp
1933 def _is_primary_commit(self):
1934 """Return true if this is a primary SVNCommit, false otherwise."""
1935 return not (self.symbolic_name or self.motivating_revnum)
1937 def flush(self):
1938 Log().write(LOG_NORMAL, "Creating Subversion commit %d (%s)"
1939 % (self.revnum, self._description))
1940 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
1942 if self.motivating_revnum is not None:
1943 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
1944 self.motivating_revnum)
1946 # If we're not a primary commit, then store our date and/or our
1947 # symbolic_name
1948 if not self._is_primary_commit():
1949 Ctx()._persistence_manager.set_name_and_date(self.revnum,
1950 self.symbolic_name,
1951 self._max_date)
1953 def __str__(self):
1954 """ Print a human-readable description of this SVNCommit. This
1955 description is not intended to be machine-parseable (although
1956 we're not going to stop you if you try!)"""
1958 ret = "SVNCommit #: " + str(self.revnum) + "\n"
1959 if self.symbolic_name:
1960 ret = ret + " symbolic name: " + self.symbolic_name + "\n"
1961 else:
1962 ret = ret + " NO symbolic name\n"
1963 ret = ret + " debug description: " + self._description + "\n"
1964 ret = ret + " cvs_revs:\n"
1965 for c_rev in self.cvs_revs:
1966 ret = ret + " " + c_rev.unique_key() + "\n"
1967 return ret
1969 def get_log_msg(self):
1970 """Returns the actual log message for a primary commit, and the
1971 appropriate manufactured log message for a secondary commit."""
1972 if self.symbolic_name is not None:
1973 return self._log_msg_for_symbolic_name_commit()
1974 elif self.motivating_revnum is not None:
1975 return self._log_msg_for_default_branch_commit()
1976 else:
1977 return self._log_msg
1979 def _log_msg_for_symbolic_name_commit(self):
1980 """Creates a log message for a manufactured commit that fills
1981 self.symbolic_name. If self.is_tag is true, write the log message
1982 as though for a tag, else write it as though for a branch."""
1983 type = 'branch'
1984 if self.is_tag:
1985 type = 'tag'
1987 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
1988 space_or_newline = ' '
1989 if len(self.symbolic_name) >= 13:
1990 space_or_newline = '\n'
1992 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
1993 % (type, space_or_newline, self.symbolic_name)
1995 def _log_msg_for_default_branch_commit(self):
1996 """Creates a log message for a manufactured commit that
1997 synchronizes a non-trunk default branch with trunk."""
1998 msg = 'This commit was generated by cvs2svn to compensate for ' \
1999 'changes in r%d,\n' \
2000 'which included commits to RCS files with non-trunk default ' \
2001 'branches.\n' % self.motivating_revnum
2002 return msg
2004 class CVSRevisionAggregator:
2005 """This class groups CVSRevisions into CVSCommits that represent
2006 at least one SVNCommit."""
2007 def __init__(self):
2008 self.metadata_db = Database(METADATA_DB, DB_OPEN_READ)
2009 if not Ctx().trunk_only:
2010 self.last_revs_db = Database(SYMBOL_LAST_CVS_REVS_DB, DB_OPEN_READ)
2011 self.cvs_commits = {}
2012 self.pending_symbols = {}
2013 # A list of symbols for which we've already encountered the last
2014 # CVSRevision that is a source for that symbol. That is, the
2015 # final fill for this symbol has been done, and we never need to
2016 # fill it again.
2017 self.done_symbols = [ ]
2019 # This variable holds the most recently created primary svn_commit
2020 # object. CVSRevisionAggregator maintains this variable merely
2021 # for its date, so that it can set dates for the SVNCommits
2022 # created in self.attempt_to_commit_symbols().
2023 self.latest_primary_svn_commit = None
2025 Ctx()._symbolings_logger = SymbolingsLogger()
2026 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2027 Ctx()._default_branches_db = Database(DEFAULT_BRANCHES_DB, DB_OPEN_READ)
2030 def process_revision(self, c_rev):
2031 # Each time we read a new line, we scan the commits we've
2032 # accumulated so far to see if any are ready for processing now.
2033 ready_queue = [ ]
2034 for digest_key, cvs_commit in self.cvs_commits.items():
2035 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2036 ready_queue.append(cvs_commit)
2037 del self.cvs_commits[digest_key]
2038 continue
2039 # If the inbound commit is on the same file as a pending commit,
2040 # close the pending commit to further changes. Don't flush it though,
2041 # as there may be other pending commits dated before this one.
2042 # ### ISSUE: the has_file() check below is not optimal.
2043 # It does fix the dataloss bug where revisions would get lost
2044 # if checked in too quickly, but it can also break apart the
2045 # commits. The correct fix would require tracking the dependencies
2046 # between change sets and committing them in proper order.
2047 if cvs_commit.has_file(c_rev.fname):
2048 unused_id = digest_key + '-'
2049 # Find a string that does is not already a key in
2050 # the self.cvs_commits dict
2051 while self.cvs_commits.has_key(unused_id):
2052 unused_id = unused_id + '-'
2053 self.cvs_commits[unused_id] = cvs_commit
2054 del self.cvs_commits[digest_key]
2056 # Add this item into the set of still-available commits.
2057 if self.cvs_commits.has_key(c_rev.digest):
2058 cvs_commit = self.cvs_commits[c_rev.digest]
2059 else:
2060 author, log = self.metadata_db[c_rev.digest]
2061 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2062 author, log)
2063 cvs_commit = self.cvs_commits[c_rev.digest]
2064 cvs_commit.add_revision(c_rev)
2066 # If there are any elements in the ready_queue at this point, they
2067 # need to be processed, because this latest rev couldn't possibly
2068 # be part of any of them. Sort them into time-order, then process
2069 # 'em.
2070 ready_queue.sort()
2072 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2073 # commits are ready.
2074 if len(ready_queue) == 0:
2075 self.attempt_to_commit_symbols(ready_queue, c_rev)
2077 for cvs_commit in ready_queue[:]:
2078 self.latest_primary_svn_commit \
2079 = cvs_commit.process_revisions(self.done_symbols)
2080 ready_queue.remove(cvs_commit)
2081 self.attempt_to_commit_symbols(ready_queue, c_rev)
2083 def flush(self):
2084 """Commit anything left in self.cvs_commits. Then inform the
2085 SymbolingsLogger that all commits are done."""
2087 ready_queue = [ ]
2088 for k, v in self.cvs_commits.items():
2089 ready_queue.append((v, k))
2091 ready_queue.sort()
2092 for cvs_commit_tuple in ready_queue[:]:
2093 self.latest_primary_svn_commit = \
2094 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2095 ready_queue.remove(cvs_commit_tuple)
2096 del self.cvs_commits[cvs_commit_tuple[1]]
2097 self.attempt_to_commit_symbols([])
2099 if not Ctx().trunk_only:
2100 Ctx()._symbolings_logger.close()
2102 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2104 This function generates 1 SVNCommit for each symbol in
2105 self.pending_symbols that doesn't have an opening CVSRevision in
2106 either QUEUED_COMMITS or self.cvs_commits.values().
2108 If C_REV is not None, then we first add to self.pending_symbols
2109 any symbols from C_REV that C_REV is the last CVSRevision for.
2111 # If we're not doing a trunk-only conversion, get the symbolic
2112 # names that this c_rev is the last *source* CVSRevision for and
2113 # add them to those left over from previous passes through the
2114 # aggregator.
2115 if c_rev and not Ctx().trunk_only:
2116 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2117 self.pending_symbols[sym] = None
2119 # Make a list of all symbols that still have *source* CVSRevisions
2120 # in the pending commit queue (self.cvs_commits).
2121 open_symbols = {}
2122 for sym in self.pending_symbols.keys():
2123 for cvs_commit in self.cvs_commits.values() + queued_commits:
2124 if cvs_commit.opens_symbolic_name(sym):
2125 open_symbols[sym] = None
2126 break
2128 # Sort the pending symbols so that we will always process the
2129 # symbols in the same order, regardless of the order in which the
2130 # dict hashing algorithm hands them back to us. We do this so
2131 # that our tests will get the same results on all platforms.
2132 sorted_pending_symbols_keys = self.pending_symbols.keys()
2133 sorted_pending_symbols_keys.sort()
2134 for sym in sorted_pending_symbols_keys:
2135 if open_symbols.has_key(sym): # sym is still open--don't close it.
2136 continue
2137 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2138 svn_commit.set_symbolic_name(sym)
2139 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2140 svn_commit.flush()
2141 self.done_symbols.append(sym)
2142 del self.pending_symbols[sym]
2145 class SymbolingsReader:
2146 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2147 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2148 returning the correct opening and closing Subversion revision
2149 numbers for a given symbolic name."""
2150 def __init__(self):
2151 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2152 reads the offsets database into memory."""
2153 self.symbolings = open(SYMBOL_OPENINGS_CLOSINGS_SORTED, 'r')
2154 # The offsets_db is really small, and we need to read and write
2155 # from it a fair bit, so suck it into memory
2156 offsets_db = Database(SYMBOL_OFFSETS_DB, DB_OPEN_READ)
2157 self.offsets = { }
2158 for key in offsets_db.db.keys():
2159 #print " ZOO:", key, offsets_db[key]
2160 self.offsets[key] = offsets_db[key]
2162 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2163 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2164 SymbolicNameFillingGuide object.
2166 Note that if we encounter an opening rev in this fill, but the
2167 corresponding closing rev takes place later than SVN_REVNUM, the
2168 closing will not be passed to SymbolicNameFillingGuide in this
2169 fill (and will be discarded when encountered in a later fill).
2170 This is perfectly fine, because we can still do a valid fill
2171 without the closing--we always try to fill what we can as soon as
2172 we can."""
2173 # It's possible to have a branch start with a file that was added
2174 # on a branch
2175 if not self.offsets.has_key(symbolic_name):
2176 return SymbolicNameFillingGuide(symbolic_name)
2177 # set our read offset for self.symbolings to the offset for
2178 # symbolic_name
2179 self.symbolings.seek(self.offsets[symbolic_name])
2181 symbol_fill = SymbolicNameFillingGuide(symbolic_name)
2182 while (1):
2183 fpos = self.symbolings.tell()
2184 line = self.symbolings.readline().rstrip()
2185 if not line:
2186 break
2187 name, revnum, type, svn_path = line.split(" ", 3)
2188 revnum = int(revnum)
2189 if (revnum > svn_revnum
2190 or name != symbolic_name):
2191 break
2192 symbol_fill.register(svn_path, revnum, type)
2194 # get current offset of the read marker and set it to the offset
2195 # for the beginning of the line we just read if we used anything
2196 # we read.
2197 if not symbol_fill.is_empty():
2198 self.offsets[symbolic_name] = fpos
2200 symbol_fill.make_node_tree()
2201 return symbol_fill
2204 class SymbolicNameFillingGuide:
2205 """A SymbolicNameFillingGuide is essentially a node tree
2206 representing the source paths to be copied to fill
2207 self.symbolic_name in the current SVNCommit.
2209 After calling self.register() on a series of openings and closings,
2210 call self.make_node_tree() to prepare self.node_tree for
2211 examination. See the docstring for self.make_node_tree() for
2212 details on the structure of self.node_tree.
2214 By walking self.node_tree and calling self.get_best_revnum() on each
2215 node, the caller can determine what subversion revision number to
2216 copy the path corresponding to that node from. self.node_tree
2217 should be treated as read-only.
2219 The caller can then descend to sub-nodes to see if their "best
2220 revnum" differs from their parents' and if it does, take appropriate
2221 actions to "patch up" the subtrees."""
2222 def __init__(self, symbolic_name):
2223 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2224 prepares it for receiving openings and closings.
2226 Returns a fully functional and armed SymbolicNameFillingGuide
2227 object."""
2228 self.name = symbolic_name
2230 self.opening_key = "/o"
2231 self.closing_key = "/c"
2233 # A dictionary of SVN_PATHS and SVN_REVNUMS whose format is:
2235 # { svn_path : { self.opening_key : svn_revnum,
2236 # self.closing_key : svn_revnum }
2237 # ...}
2238 self.things = { }
2240 # The key for the root node of the node tree
2241 self.root_key = '0'
2242 # The dictionary that holds our node tree, seeded with the root key.
2243 self.node_tree = { self.root_key : { } }
2245 def get_best_revnum(self, node, preferred_revnum):
2246 """Determine the best subversion revision number to use when
2247 copying the source tree beginning at NODE. Returns a
2248 subversion revision number.
2250 PREFERRED_REVNUM is passed to self._best_rev and used to
2251 calculate the best_revnum."""
2252 revnum = SVN_INVALID_REVNUM
2254 # Aggregate openings and closings from the rev tree
2255 openings = self._list_revnums_for_key(node, self.opening_key)
2256 closings = self._list_revnums_for_key(node, self.closing_key)
2258 # Score the lists
2259 scores = self._score_revisions(self._sum_revnum_counts(openings),
2260 self._sum_revnum_counts(closings))
2262 revnum, max_score = self._best_rev(scores, preferred_revnum)
2264 if revnum == SVN_INVALID_REVNUM:
2265 sys.stderr.write(error_prefix + ": failed to find a revision "
2266 + "to copy from when copying %s\n" % name)
2267 sys.exit(1)
2268 return revnum, max_score
2271 def _best_rev(self, scores, preferred_rev):
2272 """Return the revision with the highest score from SCORES, a list
2273 returned by _score_revisions(). When the maximum score is shared
2274 by multiple revisions, the oldest revision is selected, unless
2275 PREFERRED_REV is one of the possibilities, in which case, it is
2276 selected."""
2277 max_score = 0
2278 preferred_rev_score = -1
2279 rev = SVN_INVALID_REVNUM
2280 if preferred_rev is None:
2281 # Comparison order of different types is arbitrary. Do not
2282 # expect None to compare less than int values below.
2283 # In Python 2.3 None compares with ints like negative infinity.
2284 # In Python 2.0 None compares with ints like positive infinity.
2285 preferred_rev = SVN_INVALID_REVNUM
2286 for revnum, count in scores:
2287 if count > max_score:
2288 max_score = count
2289 rev = revnum
2290 if revnum <= preferred_rev:
2291 preferred_rev_score = count
2292 if preferred_rev_score == max_score:
2293 rev = preferred_rev
2294 return rev, max_score
2297 def _score_revisions(self, openings, closings):
2298 """Return a list of revisions and scores based on OPENINGS and
2299 CLOSINGS. The returned list looks like:
2301 [(REV1 SCORE1), (REV2 SCORE2), ...]
2303 where REV2 > REV1. OPENINGS and CLOSINGS are the values of
2304 self.opening__key and self.closing_key from some file or
2305 directory node, or else None.
2307 Each score indicates that copying the corresponding revision (or
2308 any following revision up to the next revision in the list) of the
2309 object in question would yield that many correct paths at or
2310 underneath the object. There may be other paths underneath it
2311 which are not correct and would need to be deleted or recopied;
2312 those can only be detected by descending and examining their
2313 scores.
2315 If OPENINGS is false, return the empty list."""
2316 # First look for easy outs.
2317 if not openings:
2318 return []
2320 # Must be able to call len(closings) below.
2321 if closings is None:
2322 closings = []
2324 # No easy out, so wish for lexical closures and calculate the scores :-).
2325 scores = []
2326 opening_score_accum = 0
2327 for i in range(len(openings)):
2328 opening_rev, opening_score = openings[i]
2329 opening_score_accum = opening_score_accum + opening_score
2330 scores.append((opening_rev, opening_score_accum))
2331 min = 0
2332 for i in range(len(closings)):
2333 closing_rev, closing_score = closings[i]
2334 done_exact_rev = None
2335 insert_index = None
2336 insert_score = None
2337 for j in range(min, len(scores)):
2338 score_rev, score = scores[j]
2339 if score_rev >= closing_rev:
2340 if not done_exact_rev:
2341 if score_rev > closing_rev:
2342 insert_index = j
2343 insert_score = scores[j-1][1] - closing_score
2344 done_exact_rev = 1
2345 scores[j] = (score_rev, score - closing_score)
2346 else:
2347 min = j + 1
2348 if not done_exact_rev:
2349 scores.append((closing_rev,scores[-1][1] - closing_score))
2350 if insert_index is not None:
2351 scores.insert(insert_index, (closing_rev, insert_score))
2352 return scores
2354 def _sum_revnum_counts(self, rev_list):
2355 """Takes an array of revisions (REV_LIST), for example:
2357 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2359 and adds up every occurrence of each revision and returns a sorted
2360 array of tuples containing (svn_revnum, count):
2362 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2364 s = {}
2365 for k in rev_list: # Add up the scores
2366 if s.has_key(k):
2367 s[k] = s[k] + 1
2368 else:
2369 s[k] = 1
2370 a = s.items()
2371 a.sort()
2372 return a
2374 def _list_revnums_for_key(self, node, revnum_type_key):
2375 """Scan self.node_tree and return a list of all the revision
2376 numbers (including duplicates) contained in REVNUM_TYPE_KEY values
2377 for all leaf nodes at and under NODE.
2379 REVNUM_TYPE_KEY should be either self.opening_key or
2380 self.closing_key."""
2381 revnums = []
2383 # If the node has self.opening_key, it must be a leaf node--all
2384 # leaf nodes have at least an opening key (although they may not
2385 # have a closing key. Fetch revnum and return
2386 if (self.node_tree[node].has_key(self.opening_key) and
2387 self.node_tree[node].has_key(revnum_type_key)):
2388 revnums.append(self.node_tree[node][revnum_type_key])
2389 return revnums
2391 for key, node_contents in self.node_tree[node].items():
2392 if key[0] == '/':
2393 continue
2394 revnums = revnums + \
2395 self._list_revnums_for_key(node_contents, revnum_type_key)
2396 return revnums
2398 def register(self, svn_path, svn_revnum, type):
2399 """Collects opening and closing revisions for this
2400 SymbolicNameFillingGuide. SVN_PATH is the source path that needs
2401 to be copied into self.symbolic_name, and SVN_REVNUM is either the
2402 first svn revision number that we can copy from (our opening), or
2403 the last (not inclusive) svn revision number that we can copy from
2404 (our closing). TYPE indicates whether this path is an opening or a
2405 a closing.
2407 The opening for a given SVN_PATH must be passed before the closing
2408 for it to have any effect... any closing encountered before a
2409 corresponding opening will be discarded.
2411 It is not necessary to pass a corresponding closing for every
2412 opening.
2414 # Always log an OPENING
2415 if type == OPENING:
2416 self.things[svn_path] = {self.opening_key: svn_revnum}
2417 # Only log a closing if we've already registered the opening for that path.
2418 elif type == CLOSING and self.things.has_key(svn_path):
2419 # When we have a non-trunk default branch, we may have multiple
2420 # closings--only register the first closing we encounter.
2421 if not self.things[svn_path].has_key(self.closing_key):
2422 self.things[svn_path][self.closing_key] = svn_revnum
2424 def make_node_tree(self):
2425 """Generates the SymbolicNameFillingGuide's node tree from
2426 self.things. Each leaf node maps self.opening_key to the earliest
2427 subversion revision from which this node/path may be copied; and
2428 optionally map self.closing_key to the subversion revision one
2429 higher than the last revision from which this node/path may be
2430 copied. Intermediate nodes never contain opening or closing
2431 flags."""
2433 for svn_path, open_close in self.things.items():
2434 parent_key = self.root_key
2436 path_so_far = ""
2437 # Walk up the path, one node at a time.
2438 components = svn_path.split('/')
2439 last_path_component = components[-1]
2440 for component in components:
2441 path_so_far = path_so_far + '/' + component
2443 child_key = None
2444 if not self.node_tree[parent_key].has_key(component):
2445 child_key = gen_key()
2446 self.node_tree[child_key] = { }
2447 self.node_tree[parent_key][component] = child_key
2448 else:
2449 child_key = self.node_tree[parent_key][component]
2451 # If this is the leaf, add the openings and closings.
2452 if component is last_path_component:
2453 self.node_tree[child_key] = open_close
2454 parent_key = child_key
2455 #print_node_tree(self.node_tree, self.root_key)
2457 def is_empty(self):
2458 """Return true if we haven't accumulated any openings or closings,
2459 false otherwise."""
2460 return not len(self.things)
2463 class FillSource:
2464 """Representation of a fill source used by the symbol filler in
2465 SVNRepositoryMirror."""
2466 def __init__(self, prefix, key):
2467 """Create an unscored fill source with a prefix and a key."""
2468 self.prefix = prefix
2469 self.key = key
2470 self.score = None
2471 self.revnum = None
2473 def set_score(self, score, revnum):
2474 """Set the SCORE and REVNUM."""
2475 self.score = score
2476 self.revnum = revnum
2478 def __cmp__(self, other):
2479 """Comparison operator used to sort FillSources in descending
2480 score order."""
2481 if self.score is None or other.score is None:
2482 raise TypeError, 'Tried to compare unscored FillSource'
2483 return cmp(other.score, self.score)
2486 class SVNRepositoryMirror:
2487 """Mirror a Subversion Repository as it is constructed, one
2488 SVNCommit at a time. The mirror is skeletal; it does not contain
2489 file contents. The creation of a dumpfile or Subversion repository
2490 is handled by delegates. See self.add_delegate method for how to
2491 set delegates.
2493 The structure of the repository is kept in two databases and one
2494 hash. The revs_db database maps revisions to root node keys, and
2495 the nodes_db database maps node keys to nodes. A node is a hash
2496 from directory names to keys. Both the revs_db and the nodes_db are
2497 stored on disk and each access is expensive.
2499 The nodes_db database only has the keys for old revisions. The
2500 revision that is being contructed is kept in memory in the new_nodes
2501 hash which is cheap to access.
2503 You must invoke _start_commit between SVNCommits.
2505 *** WARNING *** All path arguments to methods in this class CANNOT
2506 have leading or trailing slashes.
2509 class SVNRepositoryMirrorPathExistsError(Exception):
2510 """Exception raised if an attempt is made to add a path to the
2511 repository mirror and that path already exists in the youngest
2512 revision of the repository."""
2513 pass
2515 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
2516 """Exception raised if a CVSRevision is found to have an unexpected
2517 operation (OP) value."""
2518 pass
2520 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
2521 """Exception raised if an empty SymbolicNameFillingGuide is returned
2522 during a fill where the branch in question already exists."""
2523 pass
2525 def __init__(self):
2526 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
2527 self.delegates = [ ]
2529 # This corresponds to the 'revisions' table in a Subversion fs.
2530 self.revs_db = Database(SVN_MIRROR_REVISIONS_DB, DB_OPEN_NEW)
2531 Cleanup().register(SVN_MIRROR_REVISIONS_DB, pass8)
2533 # This corresponds to the 'nodes' table in a Subversion fs. (We
2534 # don't need a 'representations' or 'strings' table because we
2535 # only track metadata, not file contents.)
2536 self.nodes_db = Database(SVN_MIRROR_NODES_DB, DB_OPEN_NEW)
2537 Cleanup().register(SVN_MIRROR_NODES_DB, pass8)
2539 # Start at revision 0 without a root node. It will be created
2540 # by _open_writable_root_node.
2541 self.youngest = 0
2542 self.new_root_key = None
2543 self.new_nodes = { }
2545 if not Ctx().trunk_only:
2546 ###PERF IMPT: Suck this into memory.
2547 self.tags_db = TagsDatabase(DB_OPEN_READ)
2548 self.symbolings_reader = SymbolingsReader()
2550 def _initialize_repository(self, date):
2551 """Initialize the repository by creating the directories for
2552 trunk, tags, and branches. This method should only be called
2553 after all delegates are added to the repository mirror."""
2554 # Make a 'fake' SVNCommit so we can take advantage of the revprops
2555 # magic therein
2556 svn_commit = SVNCommit("Initialization", 1)
2557 svn_commit.set_date(date)
2558 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
2560 self._start_commit(svn_commit)
2561 self._mkdir(Ctx().trunk_base)
2562 if not Ctx().trunk_only:
2563 self._mkdir(Ctx().branches_base)
2564 self._mkdir(Ctx().tags_base)
2566 def _start_commit(self, svn_commit):
2567 """Start a new commit."""
2568 if self.youngest > 0:
2569 self._end_commit()
2571 self.youngest = svn_commit.revnum
2572 self.new_root_key = None
2573 self.new_nodes = { }
2575 self._invoke_delegates('start_commit', svn_commit)
2577 def _end_commit(self):
2578 """Called at the end of each commit. This method copies the newly
2579 created nodes to the on-disk nodes db."""
2580 if self.new_root_key is None:
2581 # No changes were made in this revision, so we make the root node
2582 # of the new revision be the same as the last one.
2583 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
2584 else:
2585 self.revs_db[str(self.youngest)] = self.new_root_key
2586 # Copy the new nodes to the nodes_db
2587 for key, value in self.new_nodes.items():
2588 self.nodes_db[key] = value
2590 def _get_node(self, key):
2591 """Returns the node contents for KEY which may refer to either
2592 self.nodes_db or self.new_nodes."""
2593 if self.new_nodes.has_key(key):
2594 return self.new_nodes[key]
2595 else:
2596 return self.nodes_db[key]
2598 def _open_readonly_node(self, path, revnum):
2599 """Open a readonly node for PATH at revision REVNUM. Returns the
2600 node key and node contents if the path exists, else (None, None)."""
2601 # Get the root key
2602 if revnum == self.youngest:
2603 if self.new_root_key is None:
2604 node_key = self.revs_db[str(self.youngest - 1)]
2605 else:
2606 node_key = self.new_root_key
2607 else:
2608 node_key = self.revs_db[str(revnum)]
2610 for component in path.split('/'):
2611 node_contents = self._get_node(node_key)
2612 if not node_contents.has_key(component):
2613 return None
2614 node_key = node_contents[component]
2616 return node_key
2618 def _open_writable_root_node(self):
2619 """Open a writable root node. The current root node is returned
2620 immeditely if it is already writable. If not, create a new one by
2621 copying the contents of the root node of the previous version."""
2622 if self.new_root_key is not None:
2623 return self.new_root_key, self.new_nodes[self.new_root_key]
2625 if self.youngest < 2:
2626 new_contents = { }
2627 else:
2628 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
2629 self.new_root_key = gen_key()
2630 self.new_nodes = { self.new_root_key: new_contents }
2632 return self.new_root_key, new_contents
2634 def _open_writable_node(self, svn_path, create):
2635 """Open a writable node for the path SVN_PATH, creating SVN_PATH
2636 and any missing directories if CREATE is True."""
2637 parent_key, parent_contents = self._open_writable_root_node()
2639 # Walk up the path, one node at a time.
2640 path_so_far = None
2641 components = svn_path.split('/')
2642 for i in range(len(components)):
2643 component = components[i]
2644 this_key = this_contents = None
2645 path_so_far = _path_join(path_so_far, component)
2646 if parent_contents.has_key(component):
2647 # The component exists.
2648 this_key = parent_contents[component]
2649 if self.new_nodes.has_key(this_key):
2650 this_contents = self.new_nodes[this_key]
2651 else:
2652 # Suck the node from the nodes_db, but update the key
2653 this_contents = self.nodes_db[this_key]
2654 this_key = gen_key()
2655 self.new_nodes[this_key] = this_contents
2656 parent_contents[component] = this_key
2657 elif create:
2658 # The component does not exists, so we create it.
2659 this_contents = { }
2660 this_key = gen_key()
2661 self.new_nodes[this_key] = this_contents
2662 parent_contents[component] = this_key
2663 if i < len(components) - 1:
2664 self._invoke_delegates('mkdir', path_so_far)
2665 else:
2666 # The component does not exists and we are not instructed to
2667 # create it, so we give up.
2668 return None, None
2670 parent_key = this_key
2671 parent_contents = this_contents
2673 return this_key, this_contents
2675 def _path_exists(self, path):
2676 """If PATH exists in self.youngest of the svn repository mirror,
2677 return true, else return None.
2679 PATH must not start with '/'."""
2680 return self._open_readonly_node(path, self.youngest) is not None
2682 def _fast_delete_path(self, parent_path, parent_contents, component):
2683 """Delete COMPONENT from the parent direcory PARENT_PATH with the
2684 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
2685 in PARENT_CONTENTS."""
2686 if parent_contents.has_key(component):
2687 del parent_contents[component]
2688 self._invoke_delegates('delete_path', _path_join(parent_path, component))
2690 def _delete_path(self, svn_path, should_prune=False):
2691 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
2692 all ancestor directories that are made empty when SVN_PATH is deleted.
2693 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
2695 NOTE: This function does *not* allow you delete top-level entries
2696 (like /trunk, /branches, /tags), nor does it prune upwards beyond
2697 those entries."""
2698 pos = svn_path.rfind('/')
2699 parent_path = svn_path[:pos]
2700 entry = svn_path[pos+1:]
2701 parent_key, parent_contents = self._open_writable_node(parent_path, False)
2702 if parent_key is not None:
2703 self._fast_delete_path(parent_path, parent_contents, entry)
2704 # The following recursion makes pruning an O(n^2) operation in the
2705 # worst case (where n is the depth of SVN_PATH), but the worst case
2706 # is probably rare, and the constant cost is pretty low. Another
2707 # drawback is that we issue a delete for each path and not just
2708 # a single delete for the topmost directory pruned.
2709 if (should_prune and len(parent_contents) == 0 and
2710 parent_path.find('/') != -1):
2711 self._delete_path(parent_path, True)
2713 def _mkdir(self, path):
2714 """Create PATH in the repository mirror at the youngest revision."""
2715 self._open_writable_node(path, True)
2716 self._invoke_delegates('mkdir', path)
2718 def _change_path(self, cvs_rev):
2719 """Register a change in self.youngest for the CVS_REV's svn_path
2720 in the repository mirror."""
2721 # We do not have to update the nodes because our mirror is only
2722 # concerned with the presence or absence of paths, and a file
2723 # content change does not cause any path changes.
2724 self._invoke_delegates('change_path', cvs_rev)
2726 def _add_path(self, cvs_rev):
2727 """Add the CVS_REV's svn_path to the repository mirror."""
2728 self._open_writable_node(cvs_rev.svn_path, True)
2729 self._invoke_delegates('add_path', cvs_rev)
2731 def _copy_path(self, src_path, dest_path, src_revnum):
2732 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
2733 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
2734 parent *must* exist, but DEST_PATH *cannot* exist.
2736 Return the node key and the contents of the new node at DEST_PATH
2737 as a dictionary."""
2738 # get the contents of the node of our src_path
2739 src_key = self._open_readonly_node(src_path, src_revnum)
2740 src_contents = self._get_node(src_key)
2742 # Get the parent path and the base path of the dest_path
2743 pos = dest_path.rindex('/')
2744 dest_parent = dest_path[:pos]
2745 dest_basename = dest_path[pos+1:]
2746 dest_parent_key, dest_parent_contents = \
2747 self._open_writable_node(dest_parent, False)
2749 if dest_parent_contents.has_key(dest_basename):
2750 msg = "Attempt to add path '%s' to repository mirror " % dest_path
2751 msg = msg + "when it already exists in the mirror."
2752 raise self.SVNRepositoryMirrorPathExistsError, msg
2754 dest_parent_contents[dest_basename] = src_key
2755 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
2757 # Yes sir, src_key and src_contents are also the contents of the
2758 # destination. This is a cheap copy, remember! :-)
2759 return src_key, src_contents
2761 def _fill_symbolic_name(self, svn_commit):
2762 """Performs all copies necessary to create as much of the the tag
2763 or branch SVN_COMMIT.symbolic_name as possible given the current
2764 revision of the repository mirror.
2766 The symbolic name is guaranteed to exist in the Subversion
2767 repository by the end of this call, even if there are no paths
2768 under it."""
2769 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
2770 svn_commit.symbolic_name, self.youngest)
2772 # Create the list of sources for the symbolic name. All source
2773 # prefixes must be direct sources for the destination, i.e. we
2774 # must have 'trunk' and 'branches/my_branch' and not just
2775 # 'branches'.
2776 sources = []
2777 for entry, key in symbol_fill.node_tree[symbol_fill.root_key].items():
2778 if entry == Ctx().trunk_base:
2779 sources.append(FillSource(entry, key))
2780 elif entry == Ctx().branches_base:
2781 for entry2, key2 in symbol_fill.node_tree[key].items():
2782 sources.append(FillSource(entry + '/' + entry2, key2))
2783 else:
2784 raise # Should never happen
2785 if self.tags_db.has_key(svn_commit.symbolic_name):
2786 dest_prefix = _path_join(Ctx().tags_base, svn_commit.symbolic_name)
2787 else:
2788 dest_prefix = _path_join(Ctx().branches_base,
2789 svn_commit.symbolic_name)
2791 if sources:
2792 dest_key = self._open_writable_node(dest_prefix, False)[0]
2793 self._fill(symbol_fill, dest_prefix, dest_key, sources)
2794 else:
2795 # We can only get here for a branch whose first commit is an add
2796 # (as opposed to a copy).
2797 dest_path = Ctx().branches_base + '/' + symbol_fill.name
2798 if not self._path_exists(dest_path):
2799 # If our symbol_fill was empty, that means that our first
2800 # commit on the branch was to a file added on the branch, and
2801 # that this is our first fill of that branch.
2803 # This case is covered by test 16.
2805 # ...we create the branch by copying trunk from the our
2806 # current revision number minus 1
2807 source_path = Ctx().trunk_base
2808 entries = self._copy_path(source_path, dest_path,
2809 svn_commit.revnum - 1)[1]
2810 # Now since we've just copied trunk to a branch that's
2811 # *supposed* to be empty, we delete any entries in the
2812 # copied directory.
2813 for entry in entries.keys():
2814 del_path = dest_path + '/' + entry
2815 # Delete but don't prune.
2816 self._delete_path(del_path)
2817 else:
2818 msg = "Error filling branch '" + symbol_fill.name + "'.\n"
2819 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
2820 msg = msg + "attempted to create a branch that already exists."
2821 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
2823 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
2824 path = None, parent_source_prefix = None,
2825 preferred_revnum = None, prune_ok = None):
2826 """Fill the tag or branch at DEST_PREFIX + PATH with items from
2827 SOURCES, and recurse into the child items.
2829 DEST_PREFIX is the prefix of the destination directory, e.g.
2830 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
2831 FillSource classes that are candidates to be copied to the
2832 destination. DEST_KEY is the key in self.nodes_db to the
2833 destination, or None if the destination does not yet exist.
2835 PATH is the path relative to DEST_PREFIX. If PATH is None, we
2836 are at the top level, e.g. '/tags/my_tag'.
2838 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
2839 the parent directory, and PREFERRED_REVNUM is an int which is the
2840 source revision number that the caller (who may have copied KEY's
2841 parent) used to perform its copy. If PREFERRED_REVNUM is None,
2842 then no revision is preferable to any other (which probably means
2843 that no copies have happened yet).
2845 PRUNE_OK means that a copy has been made in this recursion, and
2846 it's safe to prune directories that are not in
2847 SYMBOL_FILL.node_tree, provided that said directory has a source
2848 prefix of one of the PARENT_SOURCE_PREFIX.
2850 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
2851 should only be passed in by recursive calls."""
2852 # Calculate scores and revnums for all sources
2853 for source in sources:
2854 src_revnum, score = symbol_fill.get_best_revnum(source.key,
2855 preferred_revnum)
2856 source.set_score(score, src_revnum)
2858 # Sort the sources in descending score order so that we will make
2859 # a eventual copy from the source with the highest score.
2860 sources.sort()
2861 copy_source = sources[0]
2863 src_path = _path_join(copy_source.prefix, path)
2864 dest_path = _path_join(dest_prefix, path)
2866 # Figure out if we shall copy to this destination and delete any
2867 # destination path that is in the way.
2868 do_copy = 0
2869 if dest_key is None:
2870 do_copy = 1
2871 elif prune_ok and (parent_source_prefix != copy_source.prefix or
2872 copy_source.revnum != preferred_revnum):
2873 # We are about to replace the destination, so we need to remove
2874 # it before we perform the copy.
2875 self._delete_path(dest_path)
2876 do_copy = 1
2878 if do_copy:
2879 dest_key, dest_entries = self._copy_path(src_path, dest_path,
2880 copy_source.revnum)
2881 prune_ok = 1
2882 else:
2883 dest_entries = self._get_node(dest_key)
2885 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
2886 # elements and the values are lists of FillSource classes where
2887 # this path element exists.
2888 src_entries = {}
2889 for source in sources:
2890 for entry, key in symbol_fill.node_tree[source.key].items():
2891 if entry[0] == '/': # Skip flags
2892 continue
2893 if not src_entries.has_key(entry):
2894 src_entries[entry] = []
2895 src_entries[entry].append(FillSource(source.prefix, key))
2897 if prune_ok:
2898 # Delete the entries in DEST_ENTRIES that are not in src_entries.
2899 delete_list = [ ]
2900 for entry in dest_entries.keys():
2901 if not src_entries.has_key(entry):
2902 delete_list.append(entry)
2903 if delete_list:
2904 if not self.new_nodes.has_key(dest_key):
2905 dest_key, dest_entries = self._open_writable_node(dest_path, True)
2906 # Sort the delete list to get "diffable" dumpfiles.
2907 delete_list.sort()
2908 for entry in delete_list:
2909 self._fast_delete_path(dest_path, dest_entries, entry)
2911 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
2912 src_keys = src_entries.keys()
2913 src_keys.sort()
2914 for src_key in src_keys:
2915 if dest_entries.has_key(src_key):
2916 next_dest_key = dest_entries[src_key]
2917 else:
2918 next_dest_key = None
2919 self._fill(symbol_fill, dest_prefix, next_dest_key,
2920 src_entries[src_key], _path_join(path, src_key),
2921 copy_source.prefix, sources[0].revnum, prune_ok)
2923 def _synchronize_default_branch(self, svn_commit):
2924 """Propagate any changes that happened on a non-trunk default
2925 branch to the trunk of the repository. See
2926 CVSCommit._post_commit() for details on why this is necessary."""
2927 for cvs_rev in svn_commit.cvs_revs:
2928 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
2929 if self._path_exists(cvs_rev.svn_trunk_path):
2930 # Delete the path on trunk...
2931 self._delete_path(cvs_rev.svn_trunk_path)
2932 # ...and copy over from branch
2933 self._copy_path(cvs_rev.svn_path, cvs_rev.svn_trunk_path,
2934 svn_commit.motivating_revnum)
2935 elif cvs_rev.op == OP_DELETE:
2936 # delete trunk path
2937 self._delete_path(cvs_rev.svn_trunk_path)
2938 else:
2939 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
2940 % cvs_rev.op)
2941 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
2943 def commit(self, svn_commit):
2944 """Add an SVNCommit to the SVNRepository, incrementing the
2945 Repository revision number, and changing the repository. Invoke
2946 the delegates' _start_commit() method."""
2948 if svn_commit.revnum == 2:
2949 self._initialize_repository(svn_commit.get_date())
2951 self._start_commit(svn_commit)
2953 if svn_commit.symbolic_name:
2954 Log().write(LOG_VERBOSE, "Filling symbolic name:",
2955 svn_commit.symbolic_name)
2956 self._fill_symbolic_name(svn_commit)
2957 elif svn_commit.motivating_revnum:
2958 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
2959 % svn_commit.motivating_revnum)
2960 self._synchronize_default_branch(svn_commit)
2961 else: # This actually commits CVSRevisions
2962 if len(svn_commit.cvs_revs) > 1: plural = "s"
2963 else: plural = ""
2964 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
2965 % (len(svn_commit.cvs_revs), plural))
2966 for cvs_rev in svn_commit.cvs_revs:
2967 # See comment in CVSCommit._commit() for what this is all
2968 # about. Note that although asking self._path_exists() is
2969 # somewhat expensive, we only do it if the first two (cheap)
2970 # tests succeed first.
2971 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
2972 and (cvs_rev.rev == "1.1.1.1")
2973 and self._path_exists(cvs_rev.svn_path)):
2974 if cvs_rev.op == OP_ADD:
2975 self._add_path(cvs_rev)
2976 elif cvs_rev.op == OP_CHANGE:
2977 self._change_path(cvs_rev)
2979 if cvs_rev.op == OP_DELETE:
2980 self._delete_path(cvs_rev.svn_path, Ctx().prune)
2982 def cleanup(self):
2983 """Callback for the Cleanup.register in self.__init__."""
2984 self.revs_db = None
2985 self.nodes_db = None
2987 def add_delegate(self, delegate):
2988 """Adds DELEGATE to self.delegates.
2990 For every delegate you add, as soon as SVNRepositoryMirror
2991 performs a repository action method, SVNRepositoryMirror will call
2992 the delegate's corresponding repository action method. Multiple
2993 delegates will be called in the order that they are added. See
2994 SVNRepositoryMirrorDelegate for more information."""
2995 self.delegates.append(delegate)
2997 def _invoke_delegates(self, method, *args):
2998 """Iterate through each of our delegates, in the order that they
2999 were added, and call the delegate's method named METHOD with the
3000 arguments in ARGS."""
3001 for delegate in self.delegates:
3002 getattr(delegate, method)(*args)
3004 def finish(self):
3005 """Calls the delegate finish method."""
3006 self._end_commit()
3007 self._invoke_delegates('finish')
3008 self.cleanup()
3012 class SVNRepositoryMirrorDelegate:
3013 """Abstract superclass for any delegate to SVNRepositoryMirror.
3014 Subclasses must implement all of the methods below.
3016 For each method, a subclass implements, in its own way, the
3017 Subversion operation implied by the method's name. For example, for
3018 the add_path method, the DumpfileDelegate would write out a
3019 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3020 would merely print that the path is being added to the repository,
3021 and the RepositoryDelegate would actually cause the path to be added
3022 to the Subversion repository that it is creating.
3025 def start_commit(self, svn_commit):
3026 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3027 see subclass implementation for details."""
3028 raise NotImplementedError
3030 def mkdir(self, path):
3031 """PATH is a string; see subclass implementation for details."""
3032 raise NotImplementedError
3034 def add_path(self, c_rev):
3035 """C_REV is a CVSRevision; see subclass implementation for
3036 details."""
3037 raise NotImplementedError
3039 def change_path(self, c_rev):
3040 """C_REV is a CVSRevision; see subclass implementation for
3041 details."""
3042 raise NotImplementedError
3044 def delete_path(self, path):
3045 """PATH is a string; see subclass implementation for
3046 details."""
3047 raise NotImplementedError
3049 def copy_path(self, src_path, dest_path, src_revnum):
3050 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3051 subversion revision number (int); see subclass implementation for
3052 details."""
3053 raise NotImplementedError
3055 def finish(self):
3056 """Perform any cleanup necessary after all revisions have been
3057 committed."""
3058 raise NotImplementedError
3061 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3062 """Create a Subversion dumpfile."""
3064 def __init__(self):
3065 """Return a new DumpfileDelegate instance, attached to a dumpfile
3066 named according to Ctx().dumpfile, using Ctx().encoding.
3068 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3069 property on files, when they are changed due to a corresponding
3070 CVS revision.
3072 If Ctx().mime_mapper is true, then it is a MimeMapper instance, used
3073 to determine whether or not to set the 'svn:mime-type' property on
3074 files.
3076 If Ctx().set_eol_style is true, then set 'svn:eol-style' to 'native'
3077 for files not marked with the CVS 'kb' flag. (But see issue #39
3078 for how this might change.)"""
3079 self.dumpfile_path = Ctx().dumpfile
3080 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3081 self.set_eol_style = Ctx().set_eol_style
3082 self.mime_mapper = Ctx().mime_mapper
3083 self.path_encoding = Ctx().encoding
3085 self.dumpfile = open(self.dumpfile_path, 'wb')
3086 self._write_dumpfile_header(self.dumpfile)
3088 def _write_dumpfile_header(self, dumpfile):
3089 # Initialize the dumpfile with the standard headers.
3091 # Since the CVS repository doesn't have a UUID, and the Subversion
3092 # repository will be created with one anyway, we don't specify a
3093 # UUID in the dumpflie
3094 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3096 def _utf8_path(self, path):
3097 """Return a copy of PATH encoded in UTF-8. PATH is assumed to be
3098 encoded in self.path_encoding."""
3099 try:
3100 # Log messages can be converted with the 'replace' strategy,
3101 # but we can't afford any lossiness here.
3102 unicode_path = unicode(path, self.path_encoding, 'strict')
3103 return unicode_path.encode('utf-8')
3104 except UnicodeError:
3105 print "Unable to convert a path '%s' to internal encoding." % path
3106 print "Consider rerunning with (for example) '--encoding=latin1'"
3107 sys.exit(1)
3109 def start_commit(self, svn_commit):
3110 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3112 self.revision = svn_commit.revnum
3114 # The start of a new commit typically looks like this:
3116 # Revision-number: 1
3117 # Prop-content-length: 129
3118 # Content-length: 129
3120 # K 7
3121 # svn:log
3122 # V 27
3123 # Log message for revision 1.
3124 # K 10
3125 # svn:author
3126 # V 7
3127 # jrandom
3128 # K 8
3129 # svn:date
3130 # V 27
3131 # 2003-04-22T22:57:58.132837Z
3132 # PROPS-END
3134 # Notice that the length headers count everything -- not just the
3135 # length of the data but also the lengths of the lengths, including
3136 # the 'K ' or 'V ' prefixes.
3138 # The reason there are both Prop-content-length and Content-length
3139 # is that the former includes just props, while the latter includes
3140 # everything. That's the generic header form for any entity in a
3141 # dumpfile. But since revisions only have props, the two lengths
3142 # are always the same for revisions.
3144 # Calculate the total length of the props section.
3145 props = svn_commit.get_revprops()
3146 total_len = 10 # len('PROPS-END\n')
3147 for propname in props.keys():
3148 if props[propname] is None:
3149 continue
3150 klen = len(propname)
3151 klen_len = len('K %d' % klen)
3152 vlen = len(props[propname])
3153 vlen_len = len('V %d' % vlen)
3154 # + 4 for the four newlines within a given property's section
3155 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3157 # Print the revision header and props
3158 self.dumpfile.write('Revision-number: %d\n'
3159 'Prop-content-length: %d\n'
3160 'Content-length: %d\n'
3161 '\n'
3162 % (self.revision, total_len, total_len))
3164 for propname in props.keys():
3165 if props[propname] is None:
3166 continue
3167 self.dumpfile.write('K %d\n'
3168 '%s\n'
3169 'V %d\n'
3170 '%s\n' % (len(propname),
3171 propname,
3172 len(props[propname]),
3173 props[propname]))
3175 self.dumpfile.write('PROPS-END\n')
3176 self.dumpfile.write('\n')
3178 def mkdir(self, path):
3179 """Emit the creation of directory PATH."""
3180 self.dumpfile.write("Node-path: %s\n"
3181 "Node-kind: dir\n"
3182 "Node-action: add\n"
3183 "Prop-content-length: 10\n"
3184 "Content-length: 10\n"
3185 "\n"
3186 "PROPS-END\n"
3187 "\n"
3188 "\n" % self._utf8_path(path))
3190 def _add_or_change_path(self, c_rev, op):
3191 """Emit the addition or change corresponding to C_REV.
3192 OP is either the constant OP_ADD or OP_CHANGE."""
3194 # We begin with only a "CVS revision" property.
3195 if self.set_cvs_revnum_properties:
3196 prop_contents = 'K 15\ncvs2svn:cvs-rev\nV %d\n%s\n' \
3197 % (len(c_rev.rev), c_rev.rev)
3198 else:
3199 prop_contents = ''
3201 # Tack on the executableness, if any.
3202 if c_rev.file_executable:
3203 prop_contents = prop_contents + 'K 14\nsvn:executable\nV 1\n*\n'
3205 # If the file is marked as binary, it gets a default MIME type of
3206 # "application/octet-stream". Otherwise, it gets a default EOL
3207 # style of "native".
3208 mime_type = None
3209 eol_style = None
3210 if c_rev.mode == 'b':
3211 mime_type = 'application/octet-stream'
3212 else:
3213 eol_style = 'native'
3215 # If using the MIME mapper, possibly override the default MIME
3216 # type and EOL style.
3217 if self.mime_mapper:
3218 mtype = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3219 if mtype:
3220 mime_type = mtype
3221 if not mime_type.startswith("text/"):
3222 eol_style = None
3224 # Possibly set the svn:mime-type and svn:eol-style properties.
3225 if mime_type:
3226 prop_contents = prop_contents + ('K 13\nsvn:mime-type\nV %d\n%s\n' % \
3227 (len(mime_type), mime_type))
3228 if self.set_eol_style and eol_style:
3229 prop_contents = prop_contents + 'K 13\nsvn:eol-style\nV 6\nnative\n'
3231 # Calculate the property length (+10 for "PROPS-END\n")
3232 props_len = len(prop_contents) + 10
3234 ### FIXME: We ought to notice the -kb flag set on the RCS file and
3235 ### use it to set svn:mime-type. See issue #39.
3236 pipe_cmd = 'co -q -x,v -p%s %s' \
3237 % (c_rev.rev, escape_shell_arg(c_rev.rcs_path()))
3238 pipe = Popen3(pipe_cmd, True)
3239 pipe.tochild.close()
3241 if op == OP_ADD:
3242 action = 'add'
3243 elif op == OP_CHANGE:
3244 action = 'change'
3245 else:
3246 sys.stderr.write("%s: _add_or_change_path() called with bad op ('%s')"
3247 % (error_prefix, op))
3248 sys.exit(1)
3250 self.dumpfile.write('Node-path: %s\n'
3251 'Node-kind: file\n'
3252 'Node-action: %s\n'
3253 'Prop-content-length: %d\n'
3254 'Text-content-length: '
3255 % (self._utf8_path(c_rev.svn_path),
3256 action, props_len))
3258 pos = self.dumpfile.tell()
3260 self.dumpfile.write('0000000000000000\n'
3261 'Text-content-md5: 00000000000000000000000000000000\n'
3262 'Content-length: 0000000000000000\n'
3263 '\n')
3265 self.dumpfile.write(prop_contents + 'PROPS-END\n')
3267 # Insert the rev contents, calculating length and checksum as we go.
3268 checksum = md5.new()
3269 length = 0
3270 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3271 while buf:
3272 checksum.update(buf)
3273 length = length + len(buf)
3274 self.dumpfile.write(buf)
3275 buf = pipe.fromchild.read(PIPE_READ_SIZE)
3276 pipe.fromchild.close()
3277 error_output = pipe.childerr.read()
3278 exit_status = pipe.wait()
3279 if exit_status:
3280 sys.exit("%s: The command '%s' failed with exit status: %s\n"
3281 "and the following output:\n"
3282 "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
3284 # Go back to patch up the length and checksum headers:
3285 self.dumpfile.seek(pos, 0)
3286 # We left 16 zeros for the text length; replace them with the real
3287 # length, padded on the left with spaces:
3288 self.dumpfile.write('%16d' % length)
3289 # 16... + 1 newline + len('Text-content-md5: ') == 35
3290 self.dumpfile.seek(pos + 35, 0)
3291 self.dumpfile.write(checksum.hexdigest())
3292 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3293 self.dumpfile.seek(pos + 84, 0)
3294 # The content length is the length of property data, text data,
3295 # and any metadata around/inside around them.
3296 self.dumpfile.write('%16d' % (length + props_len))
3297 # Jump back to the end of the stream
3298 self.dumpfile.seek(0, 2)
3300 # This record is done (write two newlines -- one to terminate
3301 # contents that weren't themselves newline-termination, one to
3302 # provide a blank line for readability.
3303 self.dumpfile.write('\n\n')
3305 def add_path(self, c_rev):
3306 """Emit the addition corresponding to C_REV, a CVSRevision."""
3307 self._add_or_change_path(c_rev, OP_ADD)
3309 def change_path(self, c_rev):
3310 """Emit the change corresponding to C_REV, a CVSRevision."""
3311 self._add_or_change_path(c_rev, OP_CHANGE)
3313 def delete_path(self, path):
3314 """Emit the deletion of PATH."""
3315 self.dumpfile.write('Node-path: %s\n'
3316 'Node-action: delete\n'
3317 '\n' % self._utf8_path(path))
3319 def copy_path(self, src_path, dest_path, src_revnum):
3320 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
3321 # We don't need to include "Node-kind:" for copies; the loader
3322 # ignores it anyway and just uses the source kind instead.
3323 self.dumpfile.write('Node-path: %s\n'
3324 'Node-action: add\n'
3325 'Node-copyfrom-rev: %d\n'
3326 'Node-copyfrom-path: /%s\n'
3327 '\n'
3328 % (self._utf8_path(dest_path),
3329 src_revnum,
3330 self._utf8_path(src_path)))
3332 def finish(self):
3333 """Perform any cleanup necessary after all revisions have been
3334 committed."""
3335 self.dumpfile.close()
3338 class RepositoryDelegate(DumpfileDelegate):
3339 """Creates a new Subversion Repository. DumpfileDelegate does all
3340 of the heavy lifting."""
3341 def __init__(self):
3342 self.svnadmin = Ctx().svnadmin
3343 self.target = Ctx().target
3344 if not Ctx().existing_svnrepos:
3345 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
3346 run_command('%s create %s %s' % (self.svnadmin, Ctx().bdb_txn_nosync
3347 and "--bdb-txn-nosync"
3348 or "", self.target))
3349 DumpfileDelegate.__init__(self)
3351 # This is 1 if a commit is in progress, otherwise None.
3352 self._commit_in_progress = None
3354 self.dumpfile = open(self.dumpfile_path, 'w+b')
3355 self.loader_pipe = Popen3('%s load -q %s' % (self.svnadmin, self.target),
3356 True)
3357 self.loader_pipe.fromchild.close()
3358 try:
3359 self._write_dumpfile_header(self.loader_pipe.tochild)
3360 except IOError:
3361 sys.stderr.write("%s: svnadmin failed with the following output while "
3362 "loading the dumpfile:\n" % (error_prefix))
3363 sys.stderr.write(self.loader_pipe.childerr.read())
3364 sys.exit(1)
3366 def _feed_pipe(self):
3367 """Feed the revision stored in the dumpfile to the svnadmin
3368 load pipe."""
3369 self.dumpfile.seek(0)
3370 while 1:
3371 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
3372 if not len(data):
3373 break
3374 try:
3375 self.loader_pipe.tochild.write(data)
3376 except IOError:
3377 sys.stderr.write("%s: svnadmin failed with the following output while "
3378 "loading the dumpfile:\n" % (error_prefix))
3379 sys.stderr.write(self.loader_pipe.childerr.read())
3380 sys.exit(1)
3382 def start_commit(self, svn_commit):
3383 """Start a new commit. If a commit is already in progress, close
3384 the dumpfile, load it into the svn repository, open a new
3385 dumpfile, and write the header into it."""
3386 if self._commit_in_progress:
3387 self._feed_pipe()
3388 self.dumpfile.seek(0)
3389 self.dumpfile.truncate()
3390 DumpfileDelegate.start_commit(self, svn_commit)
3391 self._commit_in_progress = 1
3393 def finish(self):
3394 """Loads the last commit into the repository."""
3395 self._feed_pipe()
3396 self.dumpfile.close()
3397 self.loader_pipe.tochild.close()
3398 error_output = self.loader_pipe.childerr.read()
3399 exit_status = self.loader_pipe.wait()
3400 if exit_status:
3401 sys.exit('%s: svnadmin load failed with exit status: %s\n'
3402 'and the following output:\n'
3403 '%s' % (error_prefix, exit_status, error_output))
3406 class StdoutDelegate(SVNRepositoryMirrorDelegate):
3407 """Makes no changes to the disk, but writes out information to
3408 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
3409 print statements will state that we're doing something, when in
3410 reality, we aren't doing anything other than printing out that we're
3411 doing something. Kind of zen, really."""
3412 def __init__(self, total_revs):
3413 self.total_revs = total_revs
3415 def start_commit(self, svn_commit):
3416 """Prints out the Subversion revision number of the commit that is
3417 being started."""
3418 Log().write(LOG_VERBOSE, "=" * 60)
3419 Log().write(LOG_NORMAL, "Starting Subversion commit %d / %d" %
3420 (svn_commit.revnum, self.total_revs))
3422 def mkdir(self, path):
3423 """Print a line stating that we are creating directory PATH."""
3424 Log().write(LOG_VERBOSE, " New Directory", path)
3426 def add_path(self, c_rev):
3427 """Print a line stating that we are 'adding' c_rev.svn_path."""
3428 Log().write(LOG_VERBOSE, " Adding", c_rev.svn_path)
3430 def change_path(self, c_rev):
3431 """Print a line stating that we are 'changing' c_rev.svn_path."""
3432 Log().write(LOG_VERBOSE, " Changing", c_rev.svn_path)
3434 def delete_path(self, path):
3435 """Print a line stating that we are 'deleting' PATH."""
3436 Log().write(LOG_VERBOSE, " Deleting", path)
3438 def copy_path(self, src_path, dest_path, src_revnum):
3439 """Print a line stating that we are 'copying' revision SRC_REVNUM
3440 of SRC_PATH to DEST_PATH."""
3441 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
3442 Log().write(LOG_VERBOSE, " to", dest_path)
3444 def finish(self):
3445 """State that we are done creating our repository."""
3446 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
3447 Log().write(LOG_QUIET, "Done.")
3449 # This should be a local to pass1,
3450 # but Python 2.0 does not support nested scopes.
3451 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
3452 def pass1():
3453 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
3454 cd = CollectData()
3456 def visit_file(baton, dirname, files):
3457 cd = baton
3458 for fname in files:
3459 if fname[-2:] != ',v':
3460 continue
3461 cd.found_valid_file = 1
3462 pathname = os.path.join(dirname, fname)
3463 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
3464 # drop the 'Attic' portion from the pathname for the canonical name.
3465 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
3466 else:
3467 cd.set_fname(pathname, pathname)
3468 Log().write(LOG_NORMAL, pathname)
3469 try:
3470 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
3471 except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
3472 err = "%s: '%s' is not a valid ,v file" \
3473 % (error_prefix, pathname)
3474 sys.stderr.write(err + '\n')
3475 cd.fatal_errors.append(err)
3476 except:
3477 Log().write(LOG_WARN, "Exception occurred while parsing %s" % pathname)
3478 raise
3480 os.path.walk(Ctx().cvsroot, visit_file, cd)
3481 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
3483 cd.write_symbol_db()
3485 if len(cd.fatal_errors) > 0:
3486 sys.exit("Pass 1 complete.\n" + "=" * 75 + "\n"
3487 + "Error summary:\n"
3488 + "\n".join(cd.fatal_errors)
3489 + "\nExited due to fatal error(s).")
3491 if cd.found_valid_file is None:
3492 sys.exit("\nNo RCS files found in your CVS Repository!\n"
3493 + "Are you absolutely certain you are pointing cvs2svn\n"
3494 + "at a CVS repository?\n"
3495 + "\nExited due to fatal error(s).")
3497 Log().write(LOG_QUIET, "Done")
3499 def pass2():
3500 "Pass 2: clean up the revision information."
3502 symbol_db = SymbolDatabase()
3503 symbol_db.read()
3505 # Convert the list of regexps to a list of strings
3506 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
3508 error_detected = 0
3510 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
3511 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
3512 if blocked_excludes:
3513 for branch, blockers in blocked_excludes.items():
3514 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
3515 "excluded because the following symbols depend "
3516 "on it:\n" % (branch))
3517 for blocker in blockers:
3518 sys.stderr.write(" '%s'\n" % (blocker))
3519 sys.stderr.write("\n")
3520 error_detected = 1
3522 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
3523 invalid_forced_tags = [ ]
3524 for forced_tag in Ctx().forced_tags:
3525 if excludes.has_key(forced_tag):
3526 continue
3527 if symbol_db.branch_has_commit(forced_tag):
3528 invalid_forced_tags.append(forced_tag)
3529 if invalid_forced_tags:
3530 sys.stderr.write(error_prefix + ": The following branches cannot be "
3531 "forced to be tags because they have commits:\n")
3532 for tag in invalid_forced_tags:
3533 sys.stderr.write(" '%s'\n" % (tag))
3534 sys.stderr.write("\n")
3535 error_detected = 1
3537 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
3538 mismatches = symbol_db.find_mismatches(excludes)
3539 def is_not_forced(mismatch):
3540 name = mismatch[0]
3541 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
3542 mismatches = filter(is_not_forced, mismatches)
3543 if mismatches:
3544 sys.stderr.write(error_prefix + ": The following symbols are tags "
3545 "in some files and branches in others.\nUse "
3546 "--force-tag, --force-branch and/or --exclude to "
3547 "resolve the symbols.\n")
3548 for name, tag_count, branch_count, commit_count in mismatches:
3549 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
3550 "%d files and has commits in %d files.\n"
3551 % (name, tag_count, branch_count, commit_count))
3552 error_detected = 1
3554 # Bail out now if we found errors
3555 if error_detected:
3556 sys.exit(1)
3558 # Create the tags database
3559 tags_db = TagsDatabase(DB_OPEN_NEW)
3560 for tag in symbol_db.tags.keys():
3561 if tag not in Ctx().forced_branches:
3562 tags_db[tag] = None
3563 for tag in Ctx().forced_tags:
3564 tags_db[tag] = None
3566 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
3568 # We may have recorded some changes in revisions' timestamp. We need to
3569 # scan for any other files which may have had the same log message and
3570 # occurred at "the same time" and change their timestamps, too.
3572 # read the resync data file
3573 def read_resync(fname):
3574 "Read the .resync file into memory."
3576 ### note that we assume that we can hold the entire resync file in
3577 ### memory. really large repositories with whacky timestamps could
3578 ### bust this assumption. should that ever happen, then it is possible
3579 ### to split the resync file into pieces and make multiple passes,
3580 ### using each piece.
3583 # A digest maps to a sequence of lists which specify a lower and upper
3584 # time bound for matching up the commit. We keep a sequence of these
3585 # because a number of checkins with the same log message (e.g. an empty
3586 # log message) could need to be remapped. We also make them a list because
3587 # we will dynamically expand the lower/upper bound as we find commits
3588 # that fall into a particular msg and time range.
3590 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
3592 resync = { }
3594 for line in fileinput.FileInput(fname):
3595 t1 = int(line[:8], 16)
3596 digest = line[9:DIGEST_END_IDX]
3597 t2 = int(line[DIGEST_END_IDX+1:], 16)
3598 t1_l = t1 - COMMIT_THRESHOLD/2
3599 t1_u = t1 + COMMIT_THRESHOLD/2
3600 if resync.has_key(digest):
3601 resync[digest].append([t1_l, t1_u, t2])
3602 else:
3603 resync[digest] = [ [t1_l, t1_u, t2] ]
3605 # For each digest, sort the resync items in it in increasing order,
3606 # based on the lower time bound.
3607 digests = resync.keys()
3608 for digest in digests:
3609 (resync[digest]).sort()
3611 return resync
3613 resync = read_resync(DATAFILE + RESYNC_SUFFIX)
3615 output = open(DATAFILE + CLEAN_REVS_SUFFIX, 'w')
3616 Cleanup().register(DATAFILE + CLEAN_REVS_SUFFIX, pass3)
3618 # process the revisions file, looking for items to clean up
3619 for line in fileinput.FileInput(DATAFILE + REVS_SUFFIX):
3620 c_rev = CVSRevision(Ctx(), line[:-1])
3622 # Skip this entire revision if it's on an excluded branch
3623 if excludes.has_key(c_rev.branch_name):
3624 continue
3626 # Remove all references to excluded tags and branches
3627 def not_excluded(symbol, excludes=excludes):
3628 return not excludes.has_key(symbol)
3629 c_rev.branches = filter(not_excluded, c_rev.branches)
3630 c_rev.tags = filter(not_excluded, c_rev.tags)
3632 # Convert all branches that are forced to be tags
3633 for forced_tag in Ctx().forced_tags:
3634 if forced_tag in c_rev.branches:
3635 c_rev.branches.remove(forced_tag)
3636 c_rev.tags.append(forced_tag)
3638 # Convert all tags that are forced to be branches
3639 for forced_branch in Ctx().forced_branches:
3640 if forced_branch in c_rev.tags:
3641 c_rev.tags.remove(forced_branch)
3642 c_rev.branches.append(forced_branch)
3644 if not resync.has_key(c_rev.digest):
3645 output.write(line)
3646 continue
3648 # we have a hit. see if this is "near" any of the resync records we
3649 # have recorded for this digest [of the log message].
3650 for record in resync[c_rev.digest]:
3651 if record[0] <= c_rev.timestamp <= record[1]:
3652 # bingo! remap the time on this (record[2] is the new time).
3653 msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
3654 % (relative_name(Ctx().cvsroot, c_rev.fname),
3655 c_rev.rev, time.ctime(c_rev.timestamp),
3656 record[2] - c_rev.timestamp)
3657 Log().write(LOG_VERBOSE, msg)
3659 # adjust the time range. we want the COMMIT_THRESHOLD from the
3660 # bounds of the earlier/latest commit in this group.
3661 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
3662 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
3664 c_rev.timestamp = record[2]
3665 output.write(str(c_rev) + "\n")
3667 # stop looking for hits
3668 break
3669 else:
3670 # the file/rev did not need to have its time changed.
3671 output.write(line)
3672 Log().write(LOG_QUIET, "Done")
3674 def pass3():
3675 Log().write(LOG_QUIET, "Sorting CVS revisions...")
3676 sort_file(DATAFILE + CLEAN_REVS_SUFFIX,
3677 DATAFILE + SORTED_REVS_SUFFIX)
3678 Cleanup().register(DATAFILE + SORTED_REVS_SUFFIX, pass5)
3679 Log().write(LOG_QUIET, "Done")
3681 def pass4():
3682 """Iterate through sorted revs, storing them in a database.
3683 If we're not doing a trunk-only conversion, generate the
3684 LastSymbolicNameDatabase, which contains the last CVSRevision
3685 that is a source for each tag or branch.
3687 Log().write(LOG_QUIET,
3688 "Copying CVS revision data from flat file to database...")
3689 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
3690 if not Ctx().trunk_only:
3691 Log().write(LOG_QUIET,
3692 "and finding last CVS revisions for all symbolic names...")
3693 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
3694 else:
3695 # This is to avoid testing Ctx().trunk_only every time around the loop
3696 class DummyLSNDB:
3697 def noop(*args): pass
3698 log_revision = noop
3699 create_database = noop
3700 last_sym_name_db = DummyLSNDB()
3702 for line in fileinput.FileInput(DATAFILE + SORTED_REVS_SUFFIX):
3703 c_rev = CVSRevision(Ctx(), line[:-1])
3704 cvs_revs_db.log_revision(c_rev)
3705 last_sym_name_db.log_revision(c_rev)
3707 last_sym_name_db.create_database()
3708 Log().write(LOG_QUIET, "Done")
3710 def pass5():
3712 Generate the SVNCommit <-> CVSRevision mapping
3713 databases. CVSCommit._commit also calls SymbolingsLogger to register
3714 CVSRevisions that represent an opening or closing for a path on a
3715 branch or tag. See SymbolingsLogger for more details.
3717 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
3719 aggregator = CVSRevisionAggregator()
3720 for line in fileinput.FileInput(DATAFILE + SORTED_REVS_SUFFIX):
3721 c_rev = CVSRevision(Ctx(), line[:-1])
3722 if not (Ctx().trunk_only and c_rev.branch_name is not None):
3723 aggregator.process_revision(c_rev)
3724 aggregator.flush()
3726 Log().write(LOG_QUIET, "Done")
3728 def pass6():
3729 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
3731 if not Ctx().trunk_only:
3732 sort_file(SYMBOL_OPENINGS_CLOSINGS, SYMBOL_OPENINGS_CLOSINGS_SORTED)
3733 Cleanup().register(SYMBOL_OPENINGS_CLOSINGS_SORTED, pass8)
3734 Log().write(LOG_QUIET, "Done")
3736 def pass7():
3737 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
3739 def generate_offsets_for_symbolings():
3740 """This function iterates through all the lines in
3741 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
3742 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
3743 where SYMBOLIC_NAME is first encountered. This will allow us to
3744 seek to the various offsets in the file and sequentially read only
3745 the openings and closings that we need."""
3747 ###PERF This is a fine example of a db that can be in-memory and
3748 #just flushed to disk when we're done. Later, it can just be sucked
3749 #back into memory.
3750 offsets_db = Database(SYMBOL_OFFSETS_DB, DB_OPEN_NEW)
3751 Cleanup().register(SYMBOL_OFFSETS_DB, pass8)
3753 file = open(SYMBOL_OPENINGS_CLOSINGS_SORTED, 'r')
3754 old_sym = ""
3755 while 1:
3756 fpos = file.tell()
3757 line = file.readline()
3758 if not line:
3759 break
3760 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
3761 if not sym == old_sym:
3762 Log().write(LOG_VERBOSE, " ", sym)
3763 old_sym = sym
3764 offsets_db[sym] = fpos
3766 if not Ctx().trunk_only:
3767 generate_offsets_for_symbolings()
3768 Log().write(LOG_QUIET, "Done.")
3770 def pass8():
3771 svncounter = 2 # Repository initialization is 1.
3772 repos = SVNRepositoryMirror()
3773 persistence_manager = PersistenceManager(DB_OPEN_READ)
3775 if (Ctx().target):
3776 if not Ctx().dry_run:
3777 repos.add_delegate(RepositoryDelegate())
3778 Log().write(LOG_QUIET, "Starting Subversion Repository.")
3779 else:
3780 if not Ctx().dry_run:
3781 repos.add_delegate(DumpfileDelegate())
3782 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
3784 repos.add_delegate(StdoutDelegate(persistence_manager.total_revs() + 1))
3786 while(1):
3787 svn_commit = persistence_manager.get_svn_commit(svncounter)
3788 if not svn_commit:
3789 break
3790 repos.commit(svn_commit)
3791 svncounter += 1
3793 repos.finish()
3795 _passes = [
3796 pass1,
3797 pass2,
3798 pass3,
3799 pass4,
3800 pass5,
3801 pass6,
3802 pass7,
3803 pass8,
3807 class Ctx:
3808 """Session state for this run of cvs2svn.py. For example, run-time
3809 options are stored here. This class is a Borg."""
3810 __shared_state = { }
3811 def __init__(self):
3812 self.__dict__ = self.__shared_state
3813 if self.__dict__:
3814 return
3815 # Else, initialize to defaults.
3816 self.cvsroot = None
3817 self.target = None
3818 self.dumpfile = DUMPFILE
3819 self.verbose = 0
3820 self.quiet = 0
3821 self.prune = 1
3822 self.existing_svnrepos = 0
3823 self.dump_only = 0
3824 self.dry_run = 0
3825 self.trunk_only = 0
3826 self.trunk_base = "trunk"
3827 self.tags_base = "tags"
3828 self.branches_base = "branches"
3829 self.encoding = "ascii"
3830 self.mime_types_file = None
3831 self.mime_mapper = None
3832 self.set_eol_style = 0
3833 self.svnadmin = "svnadmin"
3834 self.username = None
3835 self.print_help = 0
3836 self.skip_cleanup = 0
3837 self.cvs_revnums = 0
3838 self.bdb_txn_nosync = 0
3839 self.forced_branches = []
3840 self.forced_tags = []
3841 self.excludes = []
3843 class MimeMapper:
3844 "A class that provides mappings from file names to MIME types."
3846 def __init__(self):
3847 self.mappings = { }
3848 self.missing_mappings = { }
3851 def set_mime_types_file(self, mime_types_file):
3852 for line in fileinput.input(mime_types_file):
3853 if line.startswith("#"):
3854 continue
3856 # format of a line is something like
3857 # text/plain c h cpp
3858 extensions = line.split()
3859 if len(extensions) < 2:
3860 continue
3861 type = extensions.pop(0)
3862 for ext in extensions:
3863 if self.mappings.has_key(ext) and self.mappings[ext] != type:
3864 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n" \
3865 % (warning_prefix, ext, self.mappings[ext], type))
3866 self.mappings[ext] = type
3869 def get_type_from_filename(self, filename):
3870 basename, extension = os.path.splitext(os.path.basename(filename))
3872 # Extension includes the dot, so strip it (will leave extension
3873 # empty if filename ends with a dot, which is ok):
3874 extension = extension[1:]
3876 # If there is no extension (or the file ends with a period), use
3877 # the base name for mapping. This allows us to set mappings for
3878 # files such as README or Makefile:
3879 if not extension:
3880 extension = basename
3881 if self.mappings.has_key(extension):
3882 return self.mappings[extension]
3883 self.missing_mappings[extension] = 1
3884 return None
3887 def print_missing_mappings(self):
3888 for ext in self.missing_mappings.keys():
3889 sys.stderr.write("%s: no MIME mapping for *.%s\n" % (warning_prefix, ext))
3892 def convert(start_pass, end_pass):
3893 "Convert a CVS repository to an SVN repository."
3895 if not os.path.exists(Ctx().cvsroot):
3896 sys.stderr.write(error_prefix + ': \'%s\' does not exist.\n'
3897 % Ctx().cvsroot)
3898 sys.exit(1)
3900 cleanup = Cleanup()
3901 times = [ None ] * (end_pass)
3902 for i in range(start_pass - 1, end_pass):
3903 times[i] = time.time()
3904 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
3905 _passes[i]()
3906 # Dispose of items in Ctx() not intended to live past the end of the pass
3907 # (Identified by exactly one leading underscore)
3908 for attr in dir(Ctx()):
3909 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
3910 and not attr[:6] == "_Ctx__"):
3911 delattr(Ctx(), attr)
3912 if not Ctx().skip_cleanup:
3913 cleanup.cleanup(_passes[i])
3914 times.append(time.time())
3915 Log().write(LOG_QUIET, '------------------')
3917 for i in range(start_pass, end_pass + 1):
3918 Log().write(LOG_QUIET, 'pass %d: %d seconds'
3919 % (i, int(times[i] - times[i-1])))
3920 Log().write(LOG_QUIET, ' total:',
3921 int(times[-1] - times[start_pass-1]), 'seconds')
3924 def usage():
3925 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
3926 % os.path.basename(sys.argv[0])
3927 print ' --help, -h print this usage message and exit with success'
3928 print ' --version print the version number'
3929 print ' -q quiet'
3930 print ' -v verbose'
3931 print ' -s PATH path for SVN repos'
3932 print ' -p START[:END] start at pass START, end at pass END of %d' % len(_passes)
3933 print ' If only START is given, run only pass START'
3934 print ' (implicitly enables --skip-cleanup)'
3935 print ' --existing-svnrepos load into existing SVN repository'
3936 print ' --dumpfile=PATH name of intermediate svn dumpfile'
3937 print ' --dry-run do not create a repository or a dumpfile;'
3938 print ' just print what would happen.'
3939 print ' --svnadmin=PATH path to the svnadmin program'
3940 print ' --trunk-only convert only trunk commits, not tags nor branches'
3941 print ' --trunk=PATH path for trunk (default: %s)' \
3942 % Ctx().trunk_base
3943 print ' --branches=PATH path for branches (default: %s)' \
3944 % Ctx().branches_base
3945 print ' --tags=PATH path for tags (default: %s)' \
3946 % Ctx().tags_base
3947 print ' --no-prune don\'t prune empty directories'
3948 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
3949 print ' --encoding=ENC encoding of log messages in CVS repos (default: %s)' \
3950 % Ctx().encoding
3951 print ' --force-branch=NAME Force NAME to be a branch.'
3952 print ' --force-tag=NAME Force NAME to be a tag.'
3953 print ' --exclude=REGEXP Exclude branches and tags matching REGEXP.'
3954 print ' --username=NAME username for cvs2svn-synthesized commits'
3955 print ' --skip-cleanup prevent the deletion of intermediate files'
3956 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
3957 print ' --cvs-revnums record CVS revision numbers as file properties'
3958 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
3959 ' setting svn:mime-type'
3960 print ' --set-eol-style automatically set svn:eol-style=native for\n' \
3961 ' text files'
3964 def main():
3965 # Convenience var, so we don't have to keep instantiating this Borg.
3966 ctx = Ctx()
3968 start_pass = 1
3969 end_pass = len(_passes)
3971 try:
3972 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
3973 [ "help", "create", "trunk=",
3974 "username=", "existing-svnrepos",
3975 "branches=", "tags=", "encoding=",
3976 "force-branch=", "force-tag=", "exclude=",
3977 "mime-types=", "set-eol-style",
3978 "trunk-only", "no-prune", "dry-run",
3979 "dump-only", "dumpfile=", "svnadmin=",
3980 "skip-cleanup", "cvs-revnums",
3981 "bdb-txn-nosync", "version"])
3982 except getopt.GetoptError, e:
3983 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
3984 usage()
3985 sys.exit(1)
3987 for opt, value in opts:
3988 if opt == '--version':
3989 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
3990 sys.exit(0)
3991 elif opt == '-p':
3992 # Don't cleanup if we're doing incrementals.
3993 ctx.skip_cleanup = 1
3994 if value.find(':') > 0:
3995 start_pass, end_pass = map(int, value.split(':'))
3996 else:
3997 end_pass = start_pass = int(value)
3998 if start_pass > len(_passes) or start_pass < 1:
3999 print '%s: illegal value (%d) for starting pass. '\
4000 'must be 1 through %d.' % (error_prefix, int(start_pass),
4001 len(_passes))
4002 sys.exit(1)
4003 if end_pass < start_pass or end_pass > len(_passes):
4004 print '%s: illegal value (%d) for ending pass. ' \
4005 'must be %d through %d.' % (error_prefix, int(end_pass),
4006 int(start_pass), len(_passes))
4007 sys.exit(1)
4008 elif (opt == '--help') or (opt == '-h'):
4009 ctx.print_help = 1
4010 elif opt == '-v':
4011 Log().log_level = LOG_VERBOSE
4012 ctx.verbose = 1
4013 elif opt == '-q':
4014 Log().log_level = LOG_QUIET
4015 ctx.quiet = 1
4016 elif opt == '-s':
4017 ctx.target = value
4018 elif opt == '--existing-svnrepos':
4019 ctx.existing_svnrepos = 1
4020 elif opt == '--dumpfile':
4021 ctx.dumpfile = value
4022 elif opt == '--svnadmin':
4023 ctx.svnadmin = value
4024 elif opt == '--trunk-only':
4025 ctx.trunk_only = 1
4026 elif opt == '--trunk':
4027 ctx.trunk_base = value
4028 elif opt == '--branches':
4029 ctx.branches_base = value
4030 elif opt == '--tags':
4031 ctx.tags_base = value
4032 elif opt == '--no-prune':
4033 ctx.prune = None
4034 elif opt == '--dump-only':
4035 ctx.dump_only = 1
4036 elif opt == '--dry-run':
4037 ctx.dry_run = 1
4038 elif opt == '--encoding':
4039 ctx.encoding = value
4040 elif opt == '--force-branch':
4041 ctx.forced_branches.append(value)
4042 elif opt == '--force-tag':
4043 ctx.forced_tags.append(value)
4044 elif opt == '--exclude':
4045 try:
4046 ctx.excludes.append(re.compile('^' + value + '$'))
4047 except re.error, e:
4048 sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value))
4049 elif opt == '--mime-types':
4050 ctx.mime_types_file = value
4051 elif opt == '--set-eol-style':
4052 ctx.set_eol_style = 1
4053 elif opt == '--username':
4054 ctx.username = value
4055 elif opt == '--skip-cleanup':
4056 ctx.skip_cleanup = 1
4057 elif opt == '--cvs-revnums':
4058 ctx.cvs_revnums = 1
4059 elif opt == '--bdb-txn-nosync':
4060 ctx.bdb_txn_nosync = 1
4061 elif opt == '--create':
4062 sys.stderr.write(warning_prefix +
4063 ': The behaviour produced by the --create option is now the '
4064 'default,\nand passing the option is deprecated.\n')
4066 if ctx.print_help:
4067 usage()
4068 sys.exit(0)
4070 # Consistency check for options and arguments.
4071 if len(args) == 0:
4072 usage()
4073 sys.exit(1)
4075 if len(args) > 1:
4076 sys.stderr.write(error_prefix +
4077 ": must pass only one CVS repository.\n")
4078 usage()
4079 sys.exit(1)
4081 ctx.cvsroot = args[0]
4083 if not os.path.isdir(ctx.cvsroot):
4084 sys.stderr.write(error_prefix +
4085 ": the cvs-repos-path '%s' is not an "
4086 "existing directory.\n" % ctx.cvsroot)
4087 sys.exit(1)
4089 if (not ctx.target) and (not ctx.dump_only):
4090 sys.stderr.write(error_prefix +
4091 ": must pass one of '-s' or '--dump-only'.\n")
4092 sys.exit(1)
4094 def not_both(opt1val, opt1name, opt2val, opt2name):
4095 if opt1val and opt2val:
4096 sys.stderr.write(error_prefix + ": cannot pass both '%s' and '%s'.\n" \
4097 % (opt1name, opt2name))
4098 sys.exit(1)
4100 not_both(ctx.target, '-s', ctx.dump_only, '--dump-only')
4102 not_both(ctx.dump_only, '--dump-only',
4103 ctx.existing_svnrepos, '--existing-svnrepos')
4105 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
4106 ctx.existing_svnrepos, '--existing-svnrepos')
4108 not_both(ctx.dump_only, '--dump-only',
4109 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
4111 not_both(ctx.quiet, '-q',
4112 ctx.verbose, '-v')
4114 if ((string.find(ctx.trunk_base, '/') > -1)
4115 or (string.find(ctx.tags_base, '/') > -1)
4116 or (string.find(ctx.branches_base, '/') > -1)):
4117 sys.stderr.write("%s: cannot pass multicomponent path to "
4118 "--trunk, --tags, or --branches yet.\n"
4119 " See http://cvs2svn.tigris.org/issues/show_bug.cgi?"
4120 "id=7 for details.\n" % error_prefix)
4121 sys.exit(1)
4123 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
4124 sys.stderr.write(error_prefix +
4125 ": the svn-repos-path '%s' is not an "
4126 "existing directory.\n" % ctx.target)
4127 sys.exit(1)
4129 if not ctx.dump_only and not ctx.existing_svnrepos \
4130 and os.path.exists(ctx.target):
4131 sys.stderr.write(error_prefix +
4132 ": the svn-repos-path '%s' exists.\nRemove it, or pass "
4133 "'--existing-svnrepos'.\n" % ctx.target)
4134 sys.exit(1)
4136 if ctx.mime_types_file:
4137 ctx.mime_mapper = MimeMapper()
4138 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
4140 # Lock the current directory for temporary files.
4141 try:
4142 os.mkdir('cvs2svn.lock')
4143 except OSError, e:
4144 if e.errno == errno.EACCES:
4145 sys.stderr.write(error_prefix + ": Permission denied:"
4146 + " No write access to output directory.\n")
4147 sys.exit(1)
4148 if e.errno == errno.EEXIST:
4149 sys.stderr.write(error_prefix +
4150 ": cvs2svn writes temporary files to the current working directory.\n"
4151 " The directory 'cvs2svn.lock' exists, indicating that another\n"
4152 " cvs2svn process is currently using the current directory for its\n"
4153 " temporary workspace. If you are certain that is not the case,\n"
4154 " remove the 'cvs2svn.lock' directory.\n")
4155 sys.exit(1)
4156 raise
4157 try:
4158 convert(start_pass, end_pass)
4159 finally:
4160 try: os.rmdir('cvs2svn.lock')
4161 except: pass
4163 if ctx.mime_types_file:
4164 ctx.mime_mapper.print_missing_mappings()
4166 if __name__ == '__main__':
4167 main()