2 # (Be in -*- python -*- mode.)
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION
= 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
41 # Try to get access to a bunch of encodings for use with --encoding.
42 # See http://cjkpython.i18n.org/ for details.
47 # Warnings and errors start with these strings. They are typically
48 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
49 warning_prefix
= "WARNING"
50 error_prefix
= "ERROR"
52 # Make sure this Python is recent enough.
53 if sys
.hexversion
< 0x02020000:
54 sys
.stderr
.write("'%s: Python 2.2 or higher required, "
55 "see www.python.org.\n" % error_prefix
)
58 # Pretend we have true booleans on older python versions
65 # Opening pipes was a mess before Python 2.4, because some methods did
66 # not exist on some platforms, and some behaved differenly on other.
67 # Python 2.4 solved this by adding the subprocess module, but since we
68 # cannot require such a new version, we cannot use it directly, but
69 # must implement a simplified Popen using the best means neccessary.
71 # The SimplePopen class only has the following members and methods, all
72 # behaving as documented in the subprocess.Popen class:
78 # First try subprocess.Popen...
81 def __init__(self
, cmd
, capture_stderr
):
83 stderr
= subprocess
.PIPE
86 self
._popen
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
,
87 stdout
=subprocess
.PIPE
, stderr
=stderr
)
88 self
.stdin
= self
._popen
.stdin
89 self
.stdout
= self
._popen
.stdout
91 self
.stderr
= self
._popen
.stderr
92 self
.wait
= self
._popen
.wait
94 if hasattr(popen2
, 'Popen3'):
95 # ...then try popen2.Popen3...
97 def __init__(self
, cmd
, capture_stderr
):
98 self
._popen
3 = popen2
.Popen3(cmd
, capture_stderr
)
99 self
.stdin
= self
._popen
3.tochild
100 self
.stdout
= self
._popen
3.fromchild
102 self
.stderr
= self
._popen
3.childerr
103 self
.wait
= self
._popen
3.wait
105 # ...and if all fails, use popen2.popen3...
107 def __init__(self
, cmd
, capture_stderr
):
108 if type(cmd
) != types
.StringType
:
109 cmd
= argv_to_command_string(cmd
)
110 self
.stdout
, self
.stdin
, self
.stderr
= popen2
.popen3(cmd
, mode
='b')
112 return self
.stdout
.close() or self
.stdin
.close() or \
115 # DBM module selection
117 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
118 # so that the dbhash module used by anydbm will use bsddb3.
121 sys
.modules
['bsddb'] = sys
.modules
['bsddb3']
125 # 2. These DBM modules are not good for cvs2svn.
127 if (anydbm
._defaultmod
.__name
__ == 'dumbdbm'
128 or anydbm
._defaultmod
.__name
__ == 'dbm'):
131 + ': your installation of Python does not contain a suitable\n'
132 + 'DBM module -- cvs2svn cannot continue.\n'
133 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
136 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
137 # Unfortunately, gdbm appears not to be trouble free, either.
138 if hasattr(anydbm
._defaultmod
, 'bsddb') \
139 and not hasattr(anydbm
._defaultmod
.bsddb
, '__version__'):
141 gdbm
= __import__('gdbm')
143 sys
.stderr
.write(warning_prefix
+
144 ': The version of the bsddb module found '
145 'on your computer has been reported to malfunction on some datasets, '
146 'causing KeyError exceptions. You may wish to upgrade your Python to '
147 'version 2.3 or later.\n')
149 anydbm
._defaultmod
= gdbm
151 trunk_rev
= re
.compile('^[0-9]+\\.[0-9]+$')
152 branch_tag
= re
.compile('^[0-9.]+\\.0\\.[0-9]+$')
153 vendor_tag
= re
.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
155 SVN_KEYWORDS_VALUE
= 'Author Date Id Revision'
157 # This really only matches standard '1.1.1.*'-style vendor revisions.
158 # One could conceivably have a file whose default branch is 1.1.3 or
159 # whatever, or was that at some point in time, with vendor revisions
160 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
161 # is the only time this regexp gets used), we'd have no basis for
162 # assuming that the non-standard vendor branch had ever been the
163 # default branch anyway, so we don't want this to match them anyway.
164 vendor_revision
= re
.compile('^(1\\.1\\.1)\\.([0-9])+$')
166 # If this run's output is a repository, then (in the tmpdir) we use
167 # a dumpfile of this name for repository loads.
169 # If this run's output is a dumpfile, then this is default name of
170 # that dumpfile, but in the current directory (unless the user has
171 # specified a dumpfile path, of course, in which case it will be
172 # wherever the user said).
173 DUMPFILE
= 'cvs2svn-dump'
175 # This file appears with different suffixes at different stages of
176 # processing. CVS revisions are cleaned and sorted here, for commit
177 # grouping. See design-notes.txt for details.
178 DATAFILE
= 'cvs2svn-data'
180 # This file contains a marshalled copy of all the statistics that we
181 # gather throughout the various runs of cvs2svn. The data stored as a
182 # marshalled dictionary.
183 STATISTICS_FILE
= 'cvs2svn-statistics'
185 # This text file contains records (1 per line) that describe svn
186 # filesystem paths that are the opening and closing source revisions
187 # for copies to tags and branches. The format is as follows:
189 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
191 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
192 # SVN_REVNUM are the primary and secondary sorting criteria for
193 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
194 SYMBOL_OPENINGS_CLOSINGS
= 'cvs2svn-symbolic-names.txt'
195 # A sorted version of the above file.
196 SYMBOL_OPENINGS_CLOSINGS_SORTED
= 'cvs2svn-symbolic-names-s.txt'
198 # This file is a temporary file for storing symbolic_name -> closing
199 # CVSRevision until the end of our pass where we can look up the
200 # corresponding SVNRevNum for the closing revs and write these out to
201 # the SYMBOL_OPENINGS_CLOSINGS.
202 SYMBOL_CLOSINGS_TMP
= 'cvs2svn-symbolic-names-closings-tmp.txt'
204 # Skeleton version of an svn filesystem.
205 # (These supersede and will eventually replace the two above.)
206 # See class SVNRepositoryMirror for how these work.
207 SVN_MIRROR_REVISIONS_DB
= 'cvs2svn-svn-revisions.db'
208 SVN_MIRROR_NODES_DB
= 'cvs2svn-svn-nodes.db'
210 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
211 # SYMBOL_OPENINGS_CLOSINGS_SORTED
212 SYMBOL_OFFSETS_DB
= 'cvs2svn-symbolic-name-offsets.db'
214 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
215 # the CVSRevision is the last such that is a source for those symbolic
216 # names. For example, if branch B's number is 1.3.0.2 in this CVS
217 # file, and this file's 1.3 is the latest (by date) revision among
218 # *all* CVS files that is a source for branch B, then the
219 # CVSRevision.unique_key() corresponding to this file at 1.3 would
220 # list at least B in its list.
221 SYMBOL_LAST_CVS_REVS_DB
= 'cvs2svn-symbol-last-cvs-revs.db'
223 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
224 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
225 ### the s-revs data in this database.
226 CVS_REVS_DB
= 'cvs2svn-cvs-revs.db'
228 # Lists all symbolic names that are tags. Keys are strings (symbolic
229 # names), values are ignorable.
230 TAGS_DB
= 'cvs2svn-tags.db'
232 # A list all tags. Each line consists of the tag name and the number
233 # of files in which it exists, separated by a space.
234 TAGS_LIST
= 'cvs2svn-tags.txt'
236 # A list of all branches. The file is stored as a plain text file
237 # to make it easy to look at in an editor. Each line contains the
238 # branch name, the number of files where the branch is created, the
239 # commit count, and a list of tags and branches that are defined on
240 # revisions in the branch.
241 BRANCHES_LIST
= 'cvs2svn-branches.txt'
243 # These two databases provide a bidirectional mapping between
244 # CVSRevision.unique_key()s and Subversion revision numbers.
246 # The first maps CVSRevision.unique_key() to a number; the values are
249 # The second maps a number to a list of CVSRevision.unique_key()s.
250 CVS_REVS_TO_SVN_REVNUMS
= 'cvs2svn-cvs-revs-to-svn-revnums.db'
251 SVN_REVNUMS_TO_CVS_REVS
= 'cvs2svn-svn-revnums-to-cvs-revs.db'
253 # This database maps svn_revnums to tuples of (symbolic_name, date).
255 # The svn_revnums are the revision numbers of all non-primary
256 # SVNCommits. No primary SVNCommit has a key in this database.
258 # The date is stored for all commits in this database.
260 # For commits that fill symbolic names, the symbolic_name is stored.
261 # For commits that default branch syncs, the symbolic_name is None.
262 SVN_COMMIT_NAMES_DATES
= 'cvs2svn-svn-commit-names-and-dates.db'
264 # This database maps svn_revnums of a default branch synchronization
265 # commit to the svn_revnum of the primary SVNCommit that motivated it.
267 # (NOTE: Secondary commits that fill branches and tags also have a
268 # motivating commit, but we do not record it because it is (currently)
269 # not needed for anything.)
271 # This mapping is used when generating the log message for the commit
272 # that synchronizes the default branch with trunk.
273 MOTIVATING_REVNUMS
= 'cvs2svn-svn-motivating-commit-revnums.db'
275 # How many bytes to read at a time from a pipe. 128 kiB should be
276 # large enough to be efficient without wasting too much memory.
277 PIPE_READ_SIZE
= 128 * 1024
279 # Record the default RCS branches, if any, for CVS filepaths.
281 # The keys are CVS filepaths, relative to the top of the repository
282 # and with the ",v" stripped off, so they match the cvs paths used in
283 # Commit.commit(). The values are vendor branch revisions, such as
284 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
285 # represents the highest vendor branch revision thought to have ever
286 # been head of the default branch.
288 # The reason we record a specific vendor revision, rather than a
289 # default branch number, is that there are two cases to handle:
291 # One case is simple. The RCS file lists a default branch explicitly
292 # in its header, such as '1.1.1'. In this case, we know that every
293 # revision on the vendor branch is to be treated as head of trunk at
294 # that point in time.
296 # But there's also a degenerate case. The RCS file does not currently
297 # have a default branch, yet we can deduce that for some period in the
298 # past it probably *did* have one. For example, the file has vendor
299 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
300 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
301 # case, we should record 1.1.1.96 as the last vendor revision to have
302 # been the head of the default branch.
303 DEFAULT_BRANCHES_DB
= 'cvs2svn-default-branches.db'
305 # Records the author and log message for each changeset.
306 # The keys are author+log digests, the same kind used to identify
307 # unique revisions in the .revs, etc files. Each value is a tuple
308 # of two elements: '(author logmessage)'.
309 METADATA_DB
= "cvs2svn-metadata.db"
311 # A temporary on-disk hash that maps CVSRevision unique keys to a new
312 # timestamp for that CVSRevision. These new timestamps are created in
313 # pass2, and this hash is used exclusively in pass2.
314 TWEAKED_TIMESTAMPS_DB
= "cvs2svn-fixed-timestamps.db"
316 REVS_SUFFIX
= '.revs'
317 CLEAN_REVS_SUFFIX
= '.c-revs'
318 SORTED_REVS_SUFFIX
= '.s-revs'
319 RESYNC_SUFFIX
= '.resync'
321 SVN_INVALID_REVNUM
= -1
323 COMMIT_THRESHOLD
= 5 * 60 # flush a commit if a 5 minute gap occurs
325 # Things that can happen to a file.
331 # A deltatext either does or doesn't represent some change.
332 DELTATEXT_NONEMPTY
= 'N'
333 DELTATEXT_EMPTY
= 'E'
335 DIGEST_END_IDX
= 9 + (sha
.digestsize
* 2)
337 # Constants used in SYMBOL_OPENINGS_CLOSINGS
341 class FatalException(Exception):
342 """Exception thrown on a non-recoverable error.
344 If this exception is thrown by main(), it is caught by the global
345 layer of the program, its string representation is printed, and the
346 program is ended with an exit code of 1."""
351 class FatalError(FatalException
):
352 """A FatalException that prepends error_prefix to the message."""
354 def __init__(self
, msg
):
355 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
357 FatalException
.__init
__(self
, '%s: %s\n' % (error_prefix
, msg
,))
361 """Return a path to BASENAME in Ctx().tmpdir.
362 This is a convenience function to save horizontal space in source."""
363 return os
.path
.join(Ctx().tmpdir
, basename
)
365 # Since the unofficial set also includes [/\] we need to translate those
366 # into ones that don't conflict with Subversion limitations.
367 def _clean_symbolic_name(name
):
368 """Return symbolic name NAME, translating characters that Subversion
369 does not allow in a pathname."""
370 name
= name
.replace('/','++')
371 name
= name
.replace('\\','--')
374 def _path_join(*components
):
375 """Join two or more pathname COMPONENTS, inserting '/' as needed.
376 Empty component are skipped."""
377 return string
.join(filter(None, components
), '/')
379 def _path_split(path
):
380 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
382 This is similar to os.path.split(), but always uses '/' as path
383 separator. PATH is an svn path, which should not start with a '/'.
384 HEAD is everything before the last slash, and TAIL is everything
385 after. If PATH ends in a slash, TAIL will be empty. If there is no
386 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
389 pos
= path
.rfind('/')
393 return (path
[:pos
], path
[pos
+1:],)
395 def to_utf8(value
, mode
='replace'):
396 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
397 as valid source encodings. Raise UnicodeError on failure of all
399 ### FIXME: The 'replace' default mode should be an option,
400 ### like --encoding is.
401 for encoding
in Ctx().encoding
:
403 return unicode(value
, encoding
, mode
).encode('utf8')
405 Log().write(LOG_VERBOSE
, "Encoding '%s' failed for string '%s'"
409 def run_command(command
):
410 if os
.system(command
):
411 raise FatalError('Command failed: "%s"' % (command
,))
414 class CommandFailedException(Exception):
415 """Exception raised if check_command_runs() fails."""
420 def check_command_runs(cmd
, cmdname
):
421 """Check whether the command CMD can be executed without errors.
423 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
424 name of the command as it should be included in exception error
427 This function checks three things: (1) the command can be run
428 without throwing an OSError; (2) it exits with status=0; (3) it
429 doesn't output anything to stderr. If any of these conditions is
430 not met, raise a CommandFailedException describing the problem."""
433 pipe
= SimplePopen(cmd
, True)
435 raise CommandFailedException('error executing %s: %s' % (cmdname
, e
,))
438 errmsg
= pipe
.stderr
.read()
440 if status
!= 0 or errmsg
:
441 msg
= 'error executing %s: status %s' % (cmdname
, status
,)
443 msg
+= ', error output:\n%s' % (errmsg
,)
444 raise CommandFailedException(msg
)
448 """A CVS repository from which data can be extracted."""
450 def __init__(self
, cvs_repos_path
):
451 """CVS_REPOS_PATH is the top of the CVS repository (at least as
452 far as this run is concerned)."""
454 if not os
.path
.isdir(cvs_repos_path
):
455 raise FatalError("The specified CVS repository path '%s' is not an "
456 "existing directory." % cvs_repos_path
)
458 self
.cvs_repos_path
= os
.path
.normpath(cvs_repos_path
)
459 self
.cvs_prefix_re
= re
.compile(
460 r
'^' + re
.escape(self
.cvs_repos_path
)
461 + r
'(' + re
.escape(os
.sep
) + r
'|$)')
463 def get_cvs_path(self
, fname
):
464 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
466 FNAME is a filesystem name that has to be within
467 self.cvs_repos_path. Return the filename relative to
468 self.cvs_repos_path, with ',v' striped off if present, and with
469 os.sep converted to '/'."""
471 (tail
, n
) = self
.cvs_prefix_re
.subn('', fname
, 1)
474 "get_cvs_path: '%s' is not a sub-path of '%s'"
475 % (fname
, self
.cvs_repos_path
,))
476 if tail
.endswith(',v'):
478 return string
.replace(tail
, os
.sep
, '/')
480 def get_co_pipe(self
, c_rev
, suppress_keyword_substitution
=False):
481 """Return a command string, and the pipe created using that
482 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
483 is True, then suppress the substitution of RCS/CVS keywords in the
484 output. The pipe returns the text of that CVS Revision."""
485 raise NotImplementedError
488 class CVSRepositoryViaRCS(CVSRepository
):
489 """A CVSRepository accessed via RCS."""
491 def __init__(self
, cvs_repos_path
):
492 CVSRepository
.__init
__(self
, cvs_repos_path
)
494 check_command_runs([ 'co', '-V' ], 'co')
495 except CommandFailedException
, e
:
496 raise FatalError('%s\n'
497 'Please check that co is installed and in your PATH\n'
498 '(it is a part of the RCS software).' % (e
,))
500 def get_co_pipe(self
, c_rev
, suppress_keyword_substitution
=False):
501 pipe_cmd
= [ 'co', '-q', '-x,v', '-p' + c_rev
.rev
]
502 if suppress_keyword_substitution
:
503 pipe_cmd
.append('-kk')
504 pipe_cmd
.append(c_rev
.rcs_path())
505 pipe
= SimplePopen(pipe_cmd
, True)
507 return pipe_cmd
, pipe
510 class CVSRepositoryViaCVS(CVSRepository
):
511 """A CVSRepository accessed via CVS."""
513 def __init__(self
, cvs_repos_path
):
514 CVSRepository
.__init
__(self
, cvs_repos_path
)
515 # Ascend above the specified root if necessary, to find the
516 # cvs_repository_root (a directory containing a CVSROOT directory)
517 # and the cvs_module (the path of the conversion root within the
518 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
520 def is_cvs_repository_root(path
):
521 return os
.path
.isdir(os
.path
.join(path
, 'CVSROOT'))
523 self
.cvs_repository_root
= os
.path
.abspath(self
.cvs_repos_path
)
525 while not is_cvs_repository_root(self
.cvs_repository_root
):
526 # Step up one directory:
527 prev_cvs_repository_root
= self
.cvs_repository_root
528 self
.cvs_repository_root
, module_component
= \
529 os
.path
.split(self
.cvs_repository_root
)
530 if self
.cvs_repository_root
== prev_cvs_repository_root
:
531 # Hit the root (of the drive, on Windows) without finding a
534 "the path '%s' is not a CVS repository, nor a path "
535 "within a CVS repository. A CVS repository contains "
536 "a CVSROOT directory within its root directory."
537 % (self
.cvs_repos_path
,))
539 self
.cvs_module
= module_component
+ "/" + self
.cvs_module
541 os
.environ
['CVSROOT'] = self
.cvs_repository_root
543 def cvs_ok(global_arguments
):
545 [ 'cvs' ] + global_arguments
+ [ '--version' ], 'cvs')
547 self
.global_arguments
= [ "-q", "-R" ]
549 cvs_ok(self
.global_arguments
)
550 except CommandFailedException
, e
:
551 self
.global_arguments
= [ "-q" ]
553 cvs_ok(self
.global_arguments
)
554 except CommandFailedException
, e
:
557 'Please check that cvs is installed and in your PATH.' % (e
,))
559 def get_co_pipe(self
, c_rev
, suppress_keyword_substitution
=False):
560 pipe_cmd
= [ 'cvs' ] + self
.global_arguments
+ \
561 [ 'co', '-r' + c_rev
.rev
, '-p' ]
562 if suppress_keyword_substitution
:
563 pipe_cmd
.append('-kk')
564 pipe_cmd
.append(self
.cvs_module
+ c_rev
.cvs_path
)
565 pipe
= SimplePopen(pipe_cmd
, True)
567 return pipe_cmd
, pipe
570 def generate_ignores(c_rev
):
572 pipe_cmd
, pipe
= Ctx().cvs_repository
.get_co_pipe(c_rev
)
573 buf
= pipe
.stdout
.read(PIPE_READ_SIZE
)
576 raw_ignore_val
= raw_ignore_val
+ buf
577 buf
= pipe
.stdout
.read(PIPE_READ_SIZE
)
579 error_output
= pipe
.stderr
.read()
580 exit_status
= pipe
.wait()
582 raise FatalError("The command '%s' failed with exit status: %s\n"
583 "and the following output:\n"
584 "%s" % (pipe_cmd
, exit_status
, error_output
))
586 # Tweak props: First, convert any spaces to newlines...
587 raw_ignore_val
= '\n'.join(raw_ignore_val
.split())
588 raw_ignores
= raw_ignore_val
.split('\n')
590 for ignore
in raw_ignores
:
591 # Reset the list if we encounter a '!'
592 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
599 ignore_vals
.append(ignore
)
602 # Return a string that has not been returned by gen_key() before.
606 key
= '%x' % gen_key_base
607 gen_key_base
= gen_key_base
+ 1
610 # ============================================================================
611 # This code is copied with a few modifications from:
612 # subversion/subversion/bindings/swig/python/svn/core.py
614 if sys
.platform
== "win32":
615 _escape_shell_arg_re
= re
.compile(r
'(\\+)(\"|$)')
617 def escape_shell_arg(arg
):
618 # The (very strange) parsing rules used by the C runtime library are
620 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
622 # double up slashes, but only if they are followed by a quote character
623 arg
= re
.sub(_escape_shell_arg_re
, r
'\1\1\2', arg
)
625 # surround by quotes and escape quotes inside
626 arg
= '"' + string
.replace(arg
, '"', '"^""') + '"'
630 def argv_to_command_string(argv
):
631 """Flatten a list of command line arguments into a command string.
633 The resulting command string is expected to be passed to the system
634 shell which os functions like popen() and system() invoke internally.
637 # According cmd's usage notes (cmd /?), it parses the command line by
638 # "seeing if the first character is a quote character and if so, stripping
639 # the leading character and removing the last quote character."
640 # So to prevent the argument string from being changed we add an extra set
641 # of quotes around it here.
642 return '"' + string
.join(map(escape_shell_arg
, argv
), " ") + '"'
645 def escape_shell_arg(str):
646 return "'" + string
.replace(str, "'", "'\\''") + "'"
648 def argv_to_command_string(argv
):
649 """Flatten a list of command line arguments into a command string.
651 The resulting command string is expected to be passed to the system
652 shell which os functions like popen() and system() invoke internally.
655 return string
.join(map(escape_shell_arg
, argv
), " ")
656 # ============================================================================
658 def format_date(date
):
659 """Return an svn-compatible date string for DATE (seconds since epoch)."""
660 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
661 return time
.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time
.gmtime(date
))
663 def sort_file(infile
, outfile
):
666 # GNU sort will sort our dates differently (incorrectly!) if our
667 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
669 lc_all_tmp
= os
.environ
.get('LC_ALL', None)
670 os
.environ
['LC_ALL'] = 'C'
671 # The -T option to sort has a nice side effect. The Win32 sort is
672 # case insensitive and cannot be used, and since it does not
673 # understand the -T option and dies if we try to use it, there is
674 # no risk that we use that sort by accident.
675 run_command('sort -T %s %s > %s' % (Ctx().tmpdir
, infile
, outfile
))
676 if lc_all_tmp
is None:
677 del os
.environ
['LC_ALL']
679 os
.environ
['LC_ALL'] = lc_all_tmp
681 def match_regexp_list(regexp_list
, string
):
682 """Test whether STRING matches any of the compiled regexps in
684 for regexp
in regexp_list
:
685 if regexp
.match(string
):
690 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
692 def __init__(self
, stream
):
694 self
.carry_cr
= False
697 def read(self
, size
):
699 buf
= self
.stream
.read(size
)
700 self
.eof
= len(buf
) == 0
703 self
.carry_cr
= False
704 if not self
.eof
and buf
[-1] == '\r':
707 buf
= string
.replace(buf
, '\r\n', '\n')
708 buf
= string
.replace(buf
, '\r', '\n')
709 if len(buf
) > 0 or self
.eof
:
713 # These constants represent the log levels that this script supports
719 """A Simple logging facility. Each line will be timestamped is
720 self.use_timestamps is TRUE. This class is a Borg, see
721 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
724 self
.__dict
__ = self
.__shared
_state
727 self
.log_level
= LOG_NORMAL
728 # Set this to true if you want to see timestamps on each line output.
729 self
.use_timestamps
= None
730 self
.logger
= sys
.stdout
732 def _timestamp(self
):
733 """Output a detailed timestamp at the beginning of each line output."""
734 self
.logger
.write(time
.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
736 def write(self
, log_level
, *args
):
737 """This is the public method to use for writing to a file. Only
738 messages whose LOG_LEVEL is <= self.log_level will be printed. If
739 there are multiple ARGS, they will be separated by a space."""
740 if log_level
> self
.log_level
:
742 if self
.use_timestamps
:
744 self
.logger
.write(' '.join(map(str,args
)) + "\n")
745 # Ensure that log output doesn't get out-of-order with respect to
751 """This singleton class manages any files created by cvs2svn. When
752 you first create a file, call Cleanup.register, passing the
753 filename, and the last pass that you need the file. After the end
754 of that pass, your file will be cleaned up after running an optional
755 callback. This class is a Borg, see
756 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
760 self
.__dict
__ = self
.__shared
_state
766 def register(self
, file, which_pass
, callback
=None):
767 """Register FILE for cleanup at the end of WHICH_PASS, running
768 function CALLBACK prior to removal. Registering a given FILE is
769 idempotent; you may register as many times as you wish, but it
770 will only be cleaned up once.
772 Note that if a file is registered multiple times, only the first
773 callback registered for that file will be called at cleanup
774 time. Also note that if you register a database file you must
775 close the database before cleanup, e.g. using a callback."""
776 self
._log
.setdefault(which_pass
, {})[file] = 1
777 if callback
and not self
._callbacks
.has_key(file):
778 self
._callbacks
[file] = callback
780 def cleanup(self
, which_pass
):
781 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
782 if not self
._log
.has_key(which_pass
):
784 for file in self
._log
[which_pass
]:
785 Log().write(LOG_VERBOSE
, "Deleting", file)
786 if self
._callbacks
.has_key(file):
787 self
._callbacks
[file]()
791 # Always use these constants for opening databases.
796 class AbstractDatabase(UserDict
.DictMixin
):
797 """An abstract base class for anydbm-based databases."""
799 def __init__(self
, filename
, mode
):
800 """A convenience function for opening an anydbm database."""
801 # pybsddb3 has a bug which prevents it from working with
802 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
803 # causes the DB_TRUNCATE flag to be passed, which is disallowed
804 # for databases protected by lock and transaction support
805 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
807 # Therefore, manually perform the removal (we can do this, because
808 # we know that for bsddb - but *not* anydbm in general - the database
809 # consists of one file with the name we specify, rather than several
810 # based on that name).
811 if mode
== 'n' and anydbm
._defaultmod
.__name
__ == 'dbhash':
812 if os
.path
.isfile(filename
):
816 self
.db
= anydbm
.open(filename
, mode
)
818 # Import implementations for many mapping interface methods.
819 # Note that we specifically do not do this for any method which handles
820 # *values*, because our derived classes may define __getitem__ and
821 # __setitem__ to override the storage of values, and grabbing methods
822 # directly from the dbm object would bypass this.
823 for meth_name
in ('__delitem__', 'keys',
824 '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
825 meth_ref
= getattr(self
.db
, meth_name
, None)
827 setattr(self
, meth_name
, meth_ref
)
829 def __delitem__(self
, key
):
830 "gdbm does not define a __delitem__ we can assign."
834 class SDatabase(AbstractDatabase
):
835 """A database that can only store strings."""
837 def __getitem__(self
, key
):
840 def __setitem__(self
, key
, value
):
844 class Database(AbstractDatabase
):
845 """A database that uses the marshal module to store built-in types."""
847 def __getitem__(self
, key
):
848 return marshal
.loads(self
.db
[key
])
850 def __setitem__(self
, key
, value
):
851 self
.db
[key
] = marshal
.dumps(value
)
857 self
.__dict
__ = self
.__shared
_state
860 self
.filename
= temp(STATISTICS_FILE
)
861 Cleanup().register(self
.filename
, pass8
)
862 # This can get kinda large, so we don't store it in our data dict.
863 self
.repos_files
= { }
865 if os
.path
.exists(self
.filename
):
868 self
.data
= { 'cvs_revs_count' : 0,
872 'repos_file_count' : 0,
873 'svn_rev_count' : None,
874 'first_rev_date' : 1L<<32,
876 'pass_timings' : { },
881 def log_duration_for_pass(self
, duration
, pass_num
):
882 self
.data
['pass_timings'][pass_num
] = duration
884 def set_start_time(self
, start
):
885 self
.data
['start_time'] = start
887 def set_end_time(self
, end
):
888 self
.data
['end_time'] = end
890 def _bump_item(self
, key
, amount
=1):
891 self
.data
[key
] = self
.data
[key
] + amount
893 def reset_c_rev_info(self
):
894 self
.data
['cvs_revs_count'] = 0
895 self
.data
['tags'] = { }
896 self
.data
['branches'] = { }
898 def record_c_rev(self
, c_rev
):
899 self
._bump
_item
('cvs_revs_count')
901 for tag
in c_rev
.tags
:
902 self
.data
['tags'][tag
] = None
903 for branch
in c_rev
.branches
:
904 self
.data
['branches'][branch
] = None
906 if c_rev
.timestamp
< self
.data
['first_rev_date']:
907 self
.data
['first_rev_date'] = c_rev
.timestamp
909 if c_rev
.timestamp
> self
.data
['last_rev_date']:
910 self
.data
['last_rev_date'] = c_rev
.timestamp
912 # Only add the size if this is the first time we see the file.
913 if not self
.repos_files
.has_key(c_rev
.fname
):
914 self
._bump
_item
('repos_size', c_rev
.file_size
)
915 self
.repos_files
[c_rev
.fname
] = None
917 self
.data
['repos_file_count'] = len(self
.repos_files
)
919 def set_svn_rev_count(self
, count
):
920 self
.data
['svn_rev_count'] = count
922 def svn_rev_count(self
):
923 return self
.data
['svn_rev_count']
926 open(self
.filename
, 'w').write(marshal
.dumps(self
.data
))
929 self
.data
= marshal
.loads(open(self
.filename
, 'r').read())
933 if self
.data
['svn_rev_count'] is not None:
934 svn_revs_str
= ('Total SVN Commits: %10s\n'
935 % self
.data
['svn_rev_count'])
938 'cvs2svn Statistics:\n' \
939 '------------------\n' \
940 'Total CVS Files: %10i\n' \
941 'Total CVS Revisions: %10i\n' \
942 'Total Unique Tags: %10i\n' \
943 'Total Unique Branches: %10i\n' \
944 'CVS Repos Size in KB: %10i\n' \
946 'First Revision Date: %s\n' \
947 'Last Revision Date: %s\n' \
948 '------------------' \
949 % (self
.data
['repos_file_count'],
950 self
.data
['cvs_revs_count'],
951 len(self
.data
['tags']),
952 len(self
.data
['branches']),
953 (self
.data
['repos_size'] / 1024),
955 time
.ctime(self
.data
['first_rev_date']),
956 time
.ctime(self
.data
['last_rev_date']),
960 passes
= self
.data
['pass_timings'].keys()
962 str = 'Timings:\n------------------\n'
965 if val
== 1: return "second"
968 for pass_num
in passes
:
969 duration
= int(self
.data
['pass_timings'][pass_num
])
970 p_str
= ('pass %d:%6d %s\n'
971 % (pass_num
, duration
, desc(duration
)))
974 total
= int(self
.data
['end_time'] - self
.data
['start_time'])
975 str = str + ('total: %6d %s' % (total
, desc(total
)))
979 class LastSymbolicNameDatabase
:
980 """ Passing every CVSRevision in s-revs to this class will result in
981 a Database whose key is the last CVS Revision a symbolicname was
982 seen in, and whose value is a list of all symbolicnames that were
983 last seen in that revision."""
984 def __init__(self
, mode
):
986 self
.symbol_revs_db
= Database(temp(SYMBOL_LAST_CVS_REVS_DB
), mode
)
987 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB
), pass5
)
989 # Once we've gone through all the revs,
990 # symbols.keys() will be a list of all tags and branches, and
991 # their corresponding values will be a key into the last CVS revision
992 # that they were used in.
993 def log_revision(self
, c_rev
):
994 # Gather last CVS Revision for symbolic name info and tag info
995 for tag
in c_rev
.tags
:
996 self
.symbols
[tag
] = c_rev
.unique_key()
997 if c_rev
.op
is not OP_DELETE
:
998 for branch
in c_rev
.branches
:
999 self
.symbols
[branch
] = c_rev
.unique_key()
1001 # Creates an inversion of symbols above--a dictionary of lists (key
1002 # = CVS rev unique_key: val = list of symbols that close in that
1004 def create_database(self
):
1005 for sym
, rev_unique_key
in self
.symbols
.items():
1006 ary
= self
.symbol_revs_db
.get(rev_unique_key
, [])
1008 self
.symbol_revs_db
[rev_unique_key
] = ary
1011 class CVSRevisionDatabase
:
1012 """A Database to store CVSRevision objects and retrieve them by their
1015 def __init__(self
, mode
):
1016 """Initialize an instance, opening database in MODE (like the MODE
1017 argument to Database or anydbm.open())."""
1018 self
.cvs_revs_db
= SDatabase(temp(CVS_REVS_DB
), mode
)
1019 Cleanup().register(temp(CVS_REVS_DB
), pass8
)
1021 def log_revision(self
, c_rev
):
1022 """Add C_REV, a CVSRevision, to the database."""
1023 self
.cvs_revs_db
[c_rev
.unique_key()] = str(c_rev
)
1025 def get_revision(self
, unique_key
):
1026 """Return the CVSRevision stored under UNIQUE_KEY."""
1027 return CVSRevision(Ctx(), self
.cvs_revs_db
[unique_key
])
1030 def TagsDatabase(mode
):
1031 """A Database to store which symbolic names are tags.
1032 Each key is a tag name.
1033 The value has no meaning, and should be set to None."""
1034 db
= SDatabase(temp(TAGS_DB
), mode
)
1035 Cleanup().register(temp(TAGS_DB
), pass8
)
1040 """A project within a CVS repository."""
1042 def __init__(self
, project_cvs_repos_path
,
1043 trunk_path
, branches_path
, tags_path
):
1044 """Create a new Project record.
1046 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1047 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1048 are the full, normalized directory names in svn for the
1049 corresponding part of the repository."""
1051 self
.project_cvs_repos_path
= project_cvs_repos_path
1052 prefix
= Ctx().cvs_repository
.cvs_repos_path
1053 if not self
.project_cvs_repos_path
.startswith(prefix
):
1054 raise FatalError("Project '%s' must start with '%s'"
1055 % (self
.project_cvs_repos_path
, prefix
,))
1056 # The project's main directory as a cvs_path:
1057 self
.project_cvs_path
= self
.project_cvs_repos_path
[len(prefix
):]
1058 if self
.project_cvs_path
.startswith(os
.sep
):
1059 self
.project_cvs_path
= self
.project_cvs_path
[1:]
1060 self
.trunk_path
= trunk_path
1061 self
.branches_path
= branches_path
1062 self
.tags_path
= tags_path
1063 verify_paths_disjoint(self
.trunk_path
, self
.branches_path
, self
.tags_path
)
1065 def is_source(self
, svn_path
):
1066 """Return True iff SVN_PATH is a legitimate source for this project.
1068 Legitimate paths are self.trunk_path or any directory directly
1069 under self.branches_path."""
1071 if svn_path
== self
.trunk_path
:
1074 (head
, tail
,) = _path_split(svn_path
)
1075 if head
== self
.branches_path
:
1080 def is_unremovable(self
, svn_path
):
1081 """Return True iff the specified path must not be removed."""
1083 return svn_path
in [self
.trunk_path
, self
.branches_path
, self
.tags_path
]
1085 def get_branch_path(self
, branch_name
):
1086 """Return the svnpath for the branch named BRANCH_NAME."""
1088 return _path_join(self
.branches_path
, _clean_symbolic_name(branch_name
))
1090 def get_tag_path(self
, tag_name
):
1091 """Return the svnpath for the tag named TAG_NAME."""
1093 return _path_join(self
.tags_path
, _clean_symbolic_name(tag_name
))
1095 def _relative_name(self
, cvs_path
):
1096 """Convert CVS_PATH into a name relative to this project's root directory.
1098 CVS_PATH has to begin (textually) with self.project_cvs_path.
1099 Remove prefix and optional '/'."""
1101 if not cvs_path
.startswith(self
.project_cvs_path
):
1103 "_relative_name: '%s' is not a sub-path of '%s'"
1104 % (cvs_path
, self
.project_cvs_path
,))
1105 l
= len(self
.project_cvs_path
)
1106 if cvs_path
[l
] == os
.sep
:
1110 def make_trunk_path(self
, cvs_path
):
1111 """Return the trunk path for CVS_PATH.
1113 Return the svn path for this file on trunk."""
1115 return _path_join(self
.trunk_path
, self
._relative
_name
(cvs_path
))
1117 def make_branch_path(self
, branch_name
, cvs_path
):
1118 """Return the svn path for CVS_PATH on branch BRANCH_NAME."""
1120 return _path_join(self
.get_branch_path(branch_name
),
1121 self
._relative
_name
(cvs_path
))
1125 def __init__(self
, ctx
, *args
):
1126 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1128 If CTX is None, the following members and methods of the
1129 instantiated CVSRevision class object will be unavailable (or
1130 simply will not work correctly, if at all):
1133 is_default_branch_revision()
1135 (Note that this class treats CTX as const, because the caller
1136 likely passed in a Borg instance of a Ctx. The reason this class
1137 takes CTX as as a parameter, instead of just instantiating a Ctx
1138 itself, is that this class should be usable outside cvs2svn.)
1140 If there is one argument in ARGS, it is a string, in the format of
1141 a line from a revs file. Do *not* include a trailing newline.
1143 If there are multiple ARGS, there must be 17 of them,
1144 comprising a parsed revs line:
1145 timestamp --> (int) date stamp for this cvs revision
1146 digest --> (string) digest of author+logmsg
1147 prev_timestamp --> (int) date stamp for the previous cvs revision
1148 next_timestamp --> (int) date stamp for the next cvs revision
1149 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1150 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1151 rev --> (string) this CVS rev, e.g., "1.3"
1152 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1153 file_in_attic --> (char or None) true if RCS file is in Attic
1154 file_executable --> (char or None) true if RCS file has exec bit set.
1155 file_size --> (int) size of the RCS file
1156 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1157 fname --> (string) relative path of file in CVS repos
1158 mode --> (string or None) "kkv", "kb", etc.
1159 branch_name --> (string or None) branch on which this rev occurred
1160 tags --> (list of strings) all tags on this revision
1161 branches --> (list of strings) all branches rooted in this rev
1163 The two forms of initialization are equivalent.
1165 WARNING: Due to the resync process in pass2, prev_timestamp or
1166 next_timestamp may be incorrect in the c-revs or s-revs files."""
1170 (self
.timestamp
, self
.digest
, self
.prev_timestamp
, self
.next_timestamp
,
1171 self
.op
, self
.prev_rev
, self
.rev
, self
.next_rev
, self
.file_in_attic
,
1172 self
.file_executable
, self
.file_size
, self
.deltatext_code
,
1174 self
.mode
, self
.branch_name
, self
.tags
, self
.branches
) = args
1175 elif len(args
) == 1:
1176 data
= args
[0].split(' ', 15)
1177 (self
.timestamp
, self
.digest
, self
.prev_timestamp
, self
.next_timestamp
,
1178 self
.op
, self
.prev_rev
, self
.rev
, self
.next_rev
, self
.file_in_attic
,
1179 self
.file_executable
, self
.file_size
, self
.deltatext_code
,
1180 self
.mode
, self
.branch_name
, numtags
, remainder
) = data
1181 # Patch up data items which are not simple strings
1182 self
.timestamp
= int(self
.timestamp
, 16)
1183 if self
.prev_timestamp
== "*":
1184 self
.prev_timestamp
= 0
1186 self
.prev_timestamp
= int(self
.prev_timestamp
)
1187 if self
.next_timestamp
== "*":
1188 self
.next_timestamp
= 0
1190 self
.next_timestamp
= int(self
.next_timestamp
)
1191 if self
.prev_rev
== "*":
1192 self
.prev_rev
= None
1193 if self
.next_rev
== "*":
1194 self
.next_rev
= None
1195 if self
.file_in_attic
== "*":
1196 self
.file_in_attic
= None
1197 if self
.file_executable
== "*":
1198 self
.file_executable
= None
1199 self
.file_size
= int(self
.file_size
)
1200 if self
.mode
== "*":
1202 if self
.branch_name
== "*":
1203 self
.branch_name
= None
1204 numtags
= int(numtags
)
1205 tags_and_numbranches_and_remainder
= remainder
.split(' ', numtags
+ 1)
1206 self
.tags
= tags_and_numbranches_and_remainder
[:-2]
1207 numbranches
= int(tags_and_numbranches_and_remainder
[-2])
1208 remainder
= tags_and_numbranches_and_remainder
[-1]
1209 branches_and_fname
= remainder
.split(' ', numbranches
)
1210 self
.branches
= branches_and_fname
[:-1]
1211 self
.fname
= branches_and_fname
[-1]
1213 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1216 self
.cvs_path
= ctx
.cvs_repository
.get_cvs_path(self
.fname
)
1217 if self
.branch_name
:
1218 self
.svn_path
= ctx
.project
.make_branch_path(self
.branch_name
,
1221 self
.svn_path
= ctx
.project
.make_trunk_path(self
.cvs_path
)
1223 # The 'primary key' of a CVS Revision is the revision number + the
1224 # filename. To provide a unique key (say, for a dict), we just glom
1225 # them together in a string. By passing in self.prev_rev or
1226 # self.next_rev, you can get the unique key for their respective
1228 def unique_key(self
, revnum
="0"):
1231 elif revnum
is None:
1233 return revnum
+ "/" + self
.fname
1236 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1237 % (self
.timestamp
, self
.digest
, self
.prev_timestamp
or "*",
1238 self
.next_timestamp
or "*", self
.op
, (self
.prev_rev
or "*"),
1239 self
.rev
, (self
.next_rev
or "*"), (self
.file_in_attic
or "*"),
1240 (self
.file_executable
or "*"),
1242 self
.deltatext_code
, (self
.mode
or "*"),
1243 (self
.branch_name
or "*"),
1244 len(self
.tags
), self
.tags
and " " or "", " ".join(self
.tags
),
1245 len(self
.branches
), self
.branches
and " " or "",
1246 " ".join(self
.branches
),
1249 # Returns true if this CVSRevision is the opening CVSRevision for
1250 # NAME (for this RCS file).
1251 def opens_symbolic_name(self
, name
):
1252 if name
in self
.tags
:
1254 if name
in self
.branches
:
1255 # If this c_rev opens a branch and our op is OP_DELETE, then
1256 # that means that the file that this c_rev belongs to was
1257 # created on the branch, so for all intents and purposes, this
1258 # c_rev is *technically* not an opening. See Issue #62 for more
1260 if self
.op
!= OP_DELETE
:
1264 def is_default_branch_revision(self
):
1265 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1266 revision according to DEFAULT_BRANCHES_DB (see the conditions
1267 documented there), else return None."""
1268 val
= self
._ctx
._default
_branches
_db
.get(self
.cvs_path
, None)
1270 val_last_dot
= val
.rindex(".")
1271 our_last_dot
= self
.rev
.rindex(".")
1272 default_branch
= val
[:val_last_dot
]
1273 our_branch
= self
.rev
[:our_last_dot
]
1274 default_rev_component
= int(val
[val_last_dot
+ 1:])
1275 our_rev_component
= int(self
.rev
[our_last_dot
+ 1:])
1276 if (default_branch
== our_branch
1277 and our_rev_component
<= default_rev_component
):
1283 """Returns the actual filesystem path to the RCS file of this
1285 if self
.file_in_attic
is None:
1288 basepath
, filename
= os
.path
.split(self
.fname
)
1289 return os
.path
.join(basepath
, 'Attic', filename
)
1292 "Return the last path component of self.fname, minus the ',v'"
1293 return os
.path
.split(self
.fname
)[-1][:-2]
1295 class SymbolDatabase
:
1296 """This database records information on all symbols in the RCS
1297 files. It is created in pass 1 and it is used in pass 2."""
1299 # A hash that maps tag names to commit counts
1301 # A hash that maps branch names to lists of the format
1302 # [ create_count, commit_count, blockers ], where blockers
1303 # is a hash that lists the symbols that depend on the
1304 # the branch. The blockers hash is used as a set, so the
1305 # values are not used.
1308 def register_tag_creation(self
, name
):
1309 """Register the creation of the tag NAME."""
1310 self
.tags
[name
] = self
.tags
.get(name
, 0) + 1
1312 def _branch(self
, name
):
1313 """Helper function to get a branch node that will create and
1314 initialize the node if it does not exist."""
1315 if not self
.branches
.has_key(name
):
1316 self
.branches
[name
] = [ 0, 0, { } ]
1317 return self
.branches
[name
]
1319 def register_branch_creation(self
, name
):
1320 """Register the creation of the branch NAME."""
1321 self
._branch
(name
)[0] += 1
1323 def register_branch_commit(self
, name
):
1324 """Register a commit on the branch NAME."""
1325 self
._branch
(name
)[1] += 1
1327 def register_branch_blocker(self
, name
, blocker
):
1328 """Register BLOCKER as a blocker on the branch NAME."""
1329 self
._branch
(name
)[2][blocker
] = None
1331 def branch_has_commit(self
, name
):
1332 """Return non-zero if NAME has commits. Returns 0 if name
1333 is not a branch or if it has no commits."""
1334 return self
.branches
.has_key(name
) and self
.branches
[name
][1]
1336 def find_excluded_symbols(self
, regexp_list
):
1337 """Returns a hash of all symbols thaht match the regexps in
1338 REGEXP_LISTE. The hash is used as a set so the values are
1341 for tag
in self
.tags
:
1342 if match_regexp_list(regexp_list
, tag
):
1343 excludes
[tag
] = None
1344 for branch
in self
.branches
:
1345 if match_regexp_list(regexp_list
, branch
):
1346 excludes
[branch
] = None
1349 def find_branch_exclude_blockers(self
, branch
, excludes
):
1350 """Find all blockers of BRANCH, excluding the ones in the hash
1353 if excludes
.has_key(branch
):
1354 for blocker
in self
.branches
[branch
][2]:
1355 if not excludes
.has_key(blocker
):
1356 blockers
[blocker
] = None
1359 def find_blocked_excludes(self
, excludes
):
1360 """Find all branches not in EXCLUDES that have blocking symbols that
1361 are not themselves excluded. Return a hash that maps branch names
1362 to a hash of blockers. The hash of blockes is used as a set so the
1363 values are not used."""
1364 blocked_branches
= { }
1365 for branch
in self
.branches
:
1366 blockers
= self
.find_branch_exclude_blockers(branch
, excludes
)
1368 blocked_branches
[branch
] = blockers
1369 return blocked_branches
1371 def find_mismatches(self
, excludes
=None):
1372 """Find all symbols that are defined as both tags and branches,
1373 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1374 the symbol name, tag count, branch count and commit count."""
1375 if excludes
is None:
1378 for branch
in self
.branches
:
1379 if not excludes
.has_key(branch
) and self
.tags
.has_key(branch
):
1380 mismatches
.append((branch
, # name
1381 self
.tags
[branch
], # tag count
1382 self
.branches
[branch
][0], # branch count
1383 self
.branches
[branch
][1])) # commit count
1387 """Read the symbol database from files."""
1388 f
= open(temp(TAGS_LIST
))
1393 tag
, count
= line
.split()
1394 self
.tags
[tag
] = int(count
)
1396 f
= open(temp(BRANCHES_LIST
))
1401 words
= line
.split()
1402 self
.branches
[words
[0]] = [ int(words
[1]), int(words
[2]), { } ]
1403 for blocker
in words
[3:]:
1404 self
.branches
[words
[0]][2][blocker
] = None
1407 """Store the symbol database to files."""
1408 f
= open(temp(TAGS_LIST
), "w")
1409 Cleanup().register(temp(TAGS_LIST
), pass2
)
1410 for tag
, count
in self
.tags
.items():
1411 f
.write("%s %d\n" % (tag
, count
))
1413 f
= open(temp(BRANCHES_LIST
), "w")
1414 Cleanup().register(temp(BRANCHES_LIST
), pass2
)
1415 for branch
, info
in self
.branches
.items():
1416 f
.write("%s %d %d" % (branch
, info
[0], info
[1]))
1419 f
.write(" ".join(info
[2].keys()))
1422 class CollectData(cvs2svn_rcsparse
.Sink
):
1424 self
.revs
= open(temp(DATAFILE
+ REVS_SUFFIX
), 'w')
1425 Cleanup().register(temp(DATAFILE
+ REVS_SUFFIX
), pass2
)
1426 self
.resync
= open(temp(DATAFILE
+ RESYNC_SUFFIX
), 'w')
1427 Cleanup().register(temp(DATAFILE
+ RESYNC_SUFFIX
), pass2
)
1428 self
.default_branches_db
= SDatabase(temp(DEFAULT_BRANCHES_DB
),
1430 Cleanup().register(temp(DEFAULT_BRANCHES_DB
), pass5
)
1431 self
.metadata_db
= Database(temp(METADATA_DB
), DB_OPEN_NEW
)
1432 Cleanup().register(temp(METADATA_DB
), pass8
)
1433 self
.fatal_errors
= []
1435 self
.symbol_db
= SymbolDatabase()
1437 # 1 if we've collected data for at least one file, None otherwise.
1438 self
.found_valid_file
= None
1440 # See set_fname() for initializations of other variables.
1442 def set_fname(self
, canonical_name
, filename
):
1443 """Prepare to receive data for FILENAME. FILENAME is the absolute
1444 filesystem path to the file in question, and CANONICAL_NAME is
1445 FILENAME with the 'Attic' component removed (if the file is indeed
1447 self
.fname
= canonical_name
1449 # We calculate and save some file metadata here, where we can do
1450 # it only once per file, instead of waiting until later where we
1451 # would have to do the same calculations once per CVS *revision*.
1453 self
.cvs_path
= Ctx().cvs_repository
.get_cvs_path(self
.fname
)
1455 # If the paths are not the same, then that means that the
1456 # canonical_name has had the 'Attic' component stripped out.
1457 self
.file_in_attic
= None
1458 if canonical_name
!= filename
:
1459 self
.file_in_attic
= 1
1461 file_stat
= os
.stat(filename
)
1462 # The size of our file in bytes
1463 self
.file_size
= file_stat
[stat
.ST_SIZE
]
1465 # Whether or not the executable bit is set.
1466 self
.file_executable
= None
1467 if file_stat
[0] & stat
.S_IXUSR
:
1468 self
.file_executable
= 1
1470 # revision -> [timestamp, author, old-timestamp]
1473 # Maps revision number (key) to the revision number of the
1474 # previous revision along this line of development.
1476 # For the first revision R on a branch, we consider the revision
1477 # from which R sprouted to be the 'previous'.
1479 # Note that this revision can't be determined arithmetically (due
1480 # to cvsadmin -o, which is why this is necessary).
1482 # If the key has no previous revision, then store None as key's
1486 # This dict is essentially self.prev_rev with the values mapped in
1487 # the other direction, so following key -> value will yield you
1488 # the next revision number.
1490 # Unlike self.prev_rev, if the key has no next revision, then the
1491 # key is not present.
1494 # Track the state of each revision so that in set_revision_info,
1495 # we can determine if our op is an add/change/delete. We can do
1496 # this because in set_revision_info, we'll have all of the
1497 # revisions for a file at our fingertips, and we need to examine
1498 # the state of our prev_rev to determine if we're an add or a
1499 # change--without the state of the prev_rev, we are unable to
1500 # distinguish between an add and a change.
1501 self
.rev_state
= { }
1503 # Hash mapping branch numbers, like '1.7.2', to branch names,
1504 # like 'Release_1_0_dev'.
1505 self
.branch_names
= { }
1507 # RCS flags (used for keyword expansion).
1510 # Hash mapping revision numbers, like '1.7', to lists of names
1511 # indicating which branches sprout from that revision, like
1512 # ['Release_1_0_dev', 'experimental_driver', ...].
1513 self
.branchlist
= { }
1515 # Like self.branchlist, but the values are lists of tag names that
1516 # apply to the key revision.
1519 # If set, this is an RCS branch number -- rcsparse calls this the
1520 # "principal branch", but CVS and RCS refer to it as the "default
1521 # branch", so that's what we call it, even though the rcsparse API
1522 # setter method is still 'set_principal_branch'.
1523 self
.default_branch
= None
1525 # If the RCS file doesn't have a default branch anymore, but does
1526 # have vendor revisions, then we make an educated guess that those
1527 # revisions *were* the head of the default branch up until the
1528 # commit of 1.2, at which point the file's default branch became
1529 # trunk. This records the date at which 1.2 was committed.
1530 self
.first_non_vendor_revision_date
= None
1532 # A list of all symbols defined for the current file. Used to
1533 # prevent multiple definitions of a symbol, something which can
1534 # easily happen when --symbol-transform is used.
1535 self
.defined_symbols
= { }
1537 def set_principal_branch(self
, branch
):
1538 self
.default_branch
= branch
1540 def set_expansion(self
, mode
):
1543 def set_branch_name(self
, branch_number
, name
):
1544 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1545 and that NAME sprouts from BRANCH_NUMBER .
1546 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1547 for example '1.7.2' (never '1.7.0.2')."""
1548 if not self
.branch_names
.has_key(branch_number
):
1549 self
.branch_names
[branch_number
] = name
1550 # The branchlist is keyed on the revision number from which the
1551 # branch sprouts, so strip off the odd final component.
1552 sprout_rev
= branch_number
[:branch_number
.rfind(".")]
1553 self
.branchlist
.setdefault(sprout_rev
, []).append(name
)
1554 self
.symbol_db
.register_branch_creation(name
)
1556 sys
.stderr
.write("%s: in '%s':\n"
1557 " branch '%s' already has name '%s',\n"
1558 " cannot also have name '%s', ignoring the latter\n"
1559 % (warning_prefix
, self
.fname
, branch_number
,
1560 self
.branch_names
[branch_number
], name
))
1562 def rev_to_branch_name(self
, revision
):
1563 """Return the name of the branch on which REVISION lies.
1564 REVISION is a non-branch revision number with an even number of,
1565 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1566 For the convenience of callers, REVISION can also be a trunk
1567 revision such as '1.2', in which case just return None."""
1568 if trunk_rev
.match(revision
):
1570 return self
.branch_names
.get(revision
[:revision
.rindex(".")])
1572 def add_cvs_branch(self
, revision
, branch_name
):
1573 """Record the root revision and branch revision for BRANCH_NAME,
1574 based on REVISION. REVISION is a CVS branch number having an even
1575 number of components where the second-to-last is '0'. For
1576 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1577 from 1.7 and has branch number 1.7.2."""
1578 last_dot
= revision
.rfind(".")
1579 branch_rev
= revision
[:last_dot
]
1580 last2_dot
= branch_rev
.rfind(".")
1581 branch_rev
= branch_rev
[:last2_dot
] + revision
[last_dot
:]
1582 self
.set_branch_name(branch_rev
, branch_name
)
1584 def define_tag(self
, name
, revision
):
1585 """Record a bidirectional mapping between symbolic NAME and REVISION.
1586 REVISION is an unprocessed revision number from the RCS file's
1587 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1588 This function will determine what kind of symbolic name it is by
1589 inspection, and record it in the right places."""
1590 for (pattern
, replacement
) in Ctx().symbol_transforms
:
1591 newname
= pattern
.sub(replacement
, name
)
1593 Log().write(LOG_WARN
, " symbol '%s' transformed to '%s'"
1596 if self
.defined_symbols
.has_key(name
):
1597 err
= "%s: Multiple definitions of the symbol '%s' in '%s'" \
1598 % (error_prefix
, name
, self
.fname
)
1599 sys
.stderr
.write(err
+ "\n")
1600 self
.fatal_errors
.append(err
)
1601 self
.defined_symbols
[name
] = None
1602 if branch_tag
.match(revision
):
1603 self
.add_cvs_branch(revision
, name
)
1604 elif vendor_tag
.match(revision
):
1605 self
.set_branch_name(revision
, name
)
1607 self
.taglist
.setdefault(revision
, []).append(name
)
1608 self
.symbol_db
.register_tag_creation(name
)
1610 def define_revision(self
, revision
, timestamp
, author
, state
,
1613 # Record the state of our revision for later calculations
1614 self
.rev_state
[revision
] = state
1616 # store the rev_data as a list in case we have to jigger the timestamp
1617 self
.rev_data
[revision
] = [int(timestamp
), author
, None]
1619 # When on trunk, the RCS 'next' revision number points to what
1620 # humans might consider to be the 'previous' revision number. For
1621 # example, 1.3's RCS 'next' is 1.2.
1623 # However, on a branch, the RCS 'next' revision number really does
1624 # point to what humans would consider to be the 'next' revision
1625 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1627 # In other words, in RCS, 'next' always means "where to find the next
1628 # deltatext that you need this revision to retrieve.
1630 # That said, we don't *want* RCS's behavior here, so we determine
1631 # whether we're on trunk or a branch and set self.prev_rev
1634 # One last thing. Note that if REVISION is a branch revision,
1635 # instead of mapping REVISION to NEXT, we instead map NEXT to
1636 # REVISION. Since we loop over all revisions in the file before
1637 # doing anything with the data we gather here, this 'reverse
1638 # assignment' effectively does the following:
1640 # 1. Gives us no 'prev' value for REVISION (in this
1641 # iteration... it may have been set in a previous iteration)
1643 # 2. Sets the 'prev' value for the revision with number NEXT to
1644 # REVISION. So when we come around to the branch revision whose
1645 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1647 if trunk_rev
.match(revision
):
1648 self
.prev_rev
[revision
] = next
1649 self
.next_rev
[next
] = revision
1651 self
.prev_rev
[next
] = revision
1652 self
.next_rev
[revision
] = next
1655 self
.prev_rev
[b
] = revision
1657 # Ratchet up the highest vendor head revision, if necessary.
1658 if self
.default_branch
:
1659 default_branch_root
= self
.default_branch
+ "."
1660 if ((revision
.find(default_branch_root
) == 0)
1661 and (default_branch_root
.count('.') == revision
.count('.'))):
1662 # This revision is on the default branch, so record that it is
1663 # the new highest default branch head revision.
1664 self
.default_branches_db
[self
.cvs_path
] = revision
1666 # No default branch, so make an educated guess.
1667 if revision
== '1.2':
1668 # This is probably the time when the file stopped having a
1669 # default branch, so make a note of it.
1670 self
.first_non_vendor_revision_date
= timestamp
1672 m
= vendor_revision
.match(revision
)
1673 if m
and ((not self
.first_non_vendor_revision_date
)
1674 or (timestamp
< self
.first_non_vendor_revision_date
)):
1675 # We're looking at a vendor revision, and it wasn't
1676 # committed after this file lost its default branch, so bump
1677 # the maximum trunk vendor revision in the permanent record.
1678 self
.default_branches_db
[self
.cvs_path
] = revision
1680 if not trunk_rev
.match(revision
):
1681 # Check for unlabeled branches, record them. We tried to collect
1682 # all branch names when we parsed the symbolic name header
1683 # earlier, of course, but that didn't catch unlabeled branches.
1684 # If a branch is unlabeled, this is our first encounter with it,
1685 # so we have to record its data now.
1686 branch_number
= revision
[:revision
.rindex(".")]
1687 if not self
.branch_names
.has_key(branch_number
):
1688 branch_name
= "unlabeled-" + branch_number
1689 self
.set_branch_name(branch_number
, branch_name
)
1691 # Register the commit on this non-trunk branch
1692 branch_name
= self
.branch_names
[branch_number
]
1693 self
.symbol_db
.register_branch_commit(branch_name
)
1695 def tree_completed(self
):
1696 "The revision tree has been parsed. Analyze it for consistency."
1698 # Our algorithm depends upon the timestamps on the revisions occuring
1699 # monotonically over time. That is, we want to see rev 1.34 occur in
1700 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1701 # sorting), and then tried to insert 1.34, we'd be screwed.
1703 # to perform the analysis, we'll simply visit all of the 'previous'
1704 # links that we have recorded and validate that the timestamp on the
1705 # previous revision is before the specified revision
1707 # if we have to resync some nodes, then we restart the scan. just keep
1708 # looping as long as we need to restart.
1710 for current
, prev
in self
.prev_rev
.items():
1712 # no previous revision exists (i.e. the initial revision)
1714 t_c
= self
.rev_data
[current
][0]
1715 t_p
= self
.rev_data
[prev
][0]
1717 # the previous revision occurred later than the current revision.
1718 # shove the previous revision back in time (and any before it that
1719 # may need to shift).
1721 # We sync backwards and not forwards because any given CVS
1722 # Revision has only one previous revision. However, a CVS
1723 # Revision can *be* a previous revision for many other
1724 # revisions (e.g., a revision that is the source of multiple
1725 # branches). This becomes relevant when we do the secondary
1726 # synchronization in pass 2--we can make certain that we
1727 # don't resync a revision earlier than it's previous
1728 # revision, but it would be non-trivial to make sure that we
1729 # don't resync revision R *after* any revisions that have R
1730 # as a previous revision.
1732 self
.rev_data
[prev
][0] = t_c
- 1 # new timestamp
1733 self
.rev_data
[prev
][2] = t_p
# old timestamp
1734 delta
= t_c
- 1 - t_p
1735 msg
= "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1736 % (self
.cvs_path
, prev
, time
.ctime(t_p
), delta
)
1737 Log().write(LOG_VERBOSE
, msg
)
1738 if (delta
> COMMIT_THRESHOLD
1739 or delta
< (COMMIT_THRESHOLD
* -1)):
1740 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1741 Log().write(LOG_WARN
,
1742 str % (warning_prefix
, self
.cvs_path
, delta
))
1744 prev
= self
.prev_rev
[current
]
1747 t_c
= t_c
- 1 # self.rev_data[current][0]
1748 t_p
= self
.rev_data
[prev
][0]
1750 # break from the for-loop
1753 # finished the for-loop (no resyncing was performed)
1756 def set_revision_info(self
, revision
, log
, text
):
1757 timestamp
, author
, old_ts
= self
.rev_data
[revision
]
1758 digest
= sha
.new(log
+ '\0' + author
).hexdigest()
1760 # the timestamp on this revision was changed. log it for later
1761 # resynchronization of other files's revisions that occurred
1762 # for this time and log message.
1763 self
.resync
.write('%08lx %s %08lx\n' % (old_ts
, digest
, timestamp
))
1765 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1766 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1768 # If revision 1.1 appears to have been created via 'cvs add'
1769 # instead of 'cvs import', then this file probably never had a
1770 # default branch, so retroactively remove its record in the
1771 # default branches db. The test is that the log message CVS uses
1772 # for 1.1 in imports is "Initial revision\n" with no period.
1773 if revision
== '1.1' and log
!= 'Initial revision\n':
1775 del self
.default_branches_db
[self
.cvs_path
]
1779 # Get the timestamps of the previous and next revisions
1780 prev_rev
= self
.prev_rev
[revision
]
1781 prev_timestamp
, ign
, ign
= self
.rev_data
.get(prev_rev
, [0, None, None])
1783 next_rev
= self
.next_rev
.get(revision
)
1784 next_timestamp
, ign
, ign
= self
.rev_data
.get(next_rev
, [0, None, None])
1786 # How to tell if a CVSRevision is an add, a change, or a deletion:
1788 # It's a delete if RCS state is 'dead'
1790 # It's an add if RCS state is 'Exp.' and
1791 # - we either have no previous revision
1793 # - we have a previous revision whose state is 'dead'
1795 # Anything else is a change.
1796 if self
.rev_state
[revision
] == 'dead':
1798 elif ((self
.prev_rev
.get(revision
, None) is None)
1799 or (self
.rev_state
[self
.prev_rev
[revision
]] == 'dead')):
1804 def is_branch_revision(rev
):
1805 """Return True if this revision is not a trunk revision,
1806 else return False."""
1807 if rev
.count('.') >= 3:
1811 def is_same_line_of_development(rev1
, rev2
):
1812 """Return True if rev1 and rev2 are on the same line of
1813 development (i.e., both on trunk, or both on the same branch);
1814 return False otherwise. Either rev1 or rev2 can be None, in
1815 which case automatically return False."""
1816 if rev1
is None or rev2
is None:
1818 if rev1
.count('.') == 1 and rev2
.count('.') == 1:
1820 if rev1
[0:rev1
.rfind('.')] == rev2
[0:rev2
.rfind('.')]:
1824 # There can be an odd situation where the tip revision of a branch
1825 # is alive, but every predecessor on the branch is in state 'dead',
1826 # yet the revision from which the branch sprouts is alive. (This
1827 # is sort of a mirror image of the more common case of adding a
1828 # file on a branch, in which the first revision on the branch is
1829 # alive while the revision from which it sprouts is dead.)
1831 # In this odd situation, we must mark the first live revision on
1832 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1833 # reflects, however indirectly, a change w.r.t. the source
1834 # revision from which the branch sprouts.
1836 # This is issue #89.
1838 if is_branch_revision(revision
) and self
.rev_state
[revision
] != 'dead':
1840 prev_num
= self
.prev_rev
.get(cur_num
, None)
1841 if not cur_num
or not prev_num
:
1843 if (not is_same_line_of_development(cur_num
, prev_num
)
1844 and self
.rev_state
[cur_num
] == 'dead'
1845 and self
.rev_state
[prev_num
] != 'dead'):
1847 cur_num
= self
.prev_rev
.get(cur_num
, None)
1850 deltatext_code
= DELTATEXT_NONEMPTY
1852 deltatext_code
= DELTATEXT_EMPTY
1854 c_rev
= CVSRevision(Ctx(), timestamp
, digest
, prev_timestamp
,
1856 prev_rev
, revision
, next_rev
,
1857 self
.file_in_attic
, self
.file_executable
,
1859 deltatext_code
, self
.fname
,
1860 self
.mode
, self
.rev_to_branch_name(revision
),
1861 self
.taglist
.get(revision
, []),
1862 self
.branchlist
.get(revision
, []))
1863 self
.revs
.write(str(c_rev
) + "\n")
1864 StatsKeeper().record_c_rev(c_rev
)
1866 if not self
.metadata_db
.has_key(digest
):
1867 self
.metadata_db
[digest
] = (author
, log
)
1869 def parse_completed(self
):
1870 # Walk through all branches and tags and register them with
1871 # their parent branch in the symbol database.
1872 for revision
, symbols
in self
.taglist
.items() + self
.branchlist
.items():
1873 for symbol
in symbols
:
1874 name
= self
.rev_to_branch_name(revision
)
1875 if name
is not None:
1876 self
.symbol_db
.register_branch_blocker(name
, symbol
)
1878 self
.num_files
= self
.num_files
+ 1
1880 def write_symbol_db(self
):
1881 self
.symbol_db
.write()
1883 class SymbolingsLogger
:
1884 """Manage the file that contains lines for symbol openings and
1887 This data will later be used to determine valid SVNRevision ranges
1888 from which a file can be copied when creating a branch or tag in
1889 Subversion. Do this by finding "Openings" and "Closings" for each
1890 file copied onto a branch or tag.
1892 An "Opening" is the CVSRevision from which a given branch/tag
1895 The "Closing" for that branch/tag and path is the next CVSRevision
1896 on the same line of development as the opening.
1898 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1899 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1900 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1901 'foo.c'. Note that there may be many revisions chronologically
1902 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1903 perhaps even including on branch BEE itself. But 1.3 is the next
1904 revision *on the same line* as 1.2, that is why it is the closing
1905 revision for those symbolic names of which 1.2 is the opening.
1907 The reason for doing all this hullabaloo is to make branch and tag
1908 creation as efficient as possible by minimizing the number of copies
1909 and deletes per creation. For example, revisions 1.2 and 1.3 of
1910 foo.c might correspond to revisions 17 and 30 in Subversion. That
1911 means that when creating branch BEE, there is some motivation to do
1912 the copy from one of 17-30. Now if there were another file,
1913 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1914 to revisions 24 and 39 in Subversion, we would know that the ideal
1915 thing would be to copy the branch from somewhere between 24 and 29,
1919 self
.symbolings
= open(temp(SYMBOL_OPENINGS_CLOSINGS
), 'w')
1920 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS
), pass6
)
1921 self
.closings
= open(temp(SYMBOL_CLOSINGS_TMP
), 'w')
1922 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP
), pass5
)
1924 # This keys of this dictionary are *source* cvs_paths for which
1925 # we've encountered an 'opening' on the default branch. The
1926 # values are the (uncleaned) symbolic names that this path has
1928 self
.open_paths_with_default_branches
= { }
1930 def log_revision(self
, c_rev
, svn_revnum
):
1931 """Log any openings found in C_REV, and if C_REV.next_rev is not
1932 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1933 any) will have its revnum determined later."""
1934 for name
in c_rev
.tags
+ c_rev
.branches
:
1935 self
._note
_default
_branch
_opening
(c_rev
, name
)
1936 if c_rev
.op
!= OP_DELETE
:
1937 self
._log
(name
, svn_revnum
,
1938 c_rev
.cvs_path
, c_rev
.branch_name
, OPENING
)
1940 # If our c_rev has a next_rev, then that's the closing rev for
1941 # this source revision. Log it to closings for later processing
1942 # since we don't know the svn_revnum yet.
1943 if c_rev
.next_rev
is not None:
1944 self
.closings
.write('%s %s\n' %
1945 (name
, c_rev
.unique_key(c_rev
.next_rev
)))
1947 def _log(self
, name
, svn_revnum
, cvs_path
, branch_name
, type):
1948 """Write out a single line to the symbol_openings_closings file
1949 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1950 opening or closing (TYPE) of NAME (a symbolic name).
1952 TYPE should only be one of the following global constants:
1953 OPENING or CLOSING."""
1954 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1955 self
.symbolings
.write(
1956 '%s %.8d %s %s %s\n'
1957 % (name
, svn_revnum
, type, branch_name
or '*', cvs_path
))
1960 """Iterate through the closings file, lookup the svn_revnum for
1961 each closing CVSRevision, and write a proper line out to the
1963 # Use this to get the c_rev of our rev_key
1964 cvs_revs_db
= CVSRevisionDatabase(DB_OPEN_READ
)
1966 self
.closings
.close()
1967 for line
in fileinput
.FileInput(temp(SYMBOL_CLOSINGS_TMP
)):
1968 (name
, rev_key
) = line
.rstrip().split(" ", 1)
1969 svn_revnum
= Ctx()._persistence
_manager
.get_svn_revnum(rev_key
)
1971 c_rev
= cvs_revs_db
.get_revision(rev_key
)
1972 self
._log
(name
, svn_revnum
, c_rev
.cvs_path
, c_rev
.branch_name
, CLOSING
)
1974 self
.symbolings
.close()
1976 def _note_default_branch_opening(self
, c_rev
, symbolic_name
):
1977 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1978 opening for SYMBOLIC_NAME."""
1979 self
.open_paths_with_default_branches
.setdefault(
1980 c_rev
.cvs_path
, []).append(symbolic_name
)
1982 def log_default_branch_closing(self
, c_rev
, svn_revnum
):
1983 """If self.open_paths_with_default_branches contains
1984 C_REV.cvs_path, then call log each name in
1985 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1986 with SVN_REVNUM as the closing revision number."""
1987 path
= c_rev
.cvs_path
1988 if self
.open_paths_with_default_branches
.has_key(path
):
1989 # log each symbol as a closing
1990 for name
in self
.open_paths_with_default_branches
[path
]:
1991 self
._log
(name
, svn_revnum
, path
, None, CLOSING
)
1992 # Remove them from the openings list as we're done with them.
1993 del self
.open_paths_with_default_branches
[path
]
1996 class PersistenceManager
:
1997 """The PersistenceManager allows us to effectively store SVNCommits
1998 to disk and retrieve them later using only their subversion revision
1999 number as the key. It also returns the subversion revision number
2000 for a given CVSRevision's unique key.
2002 All information pertinent to each SVNCommit is stored in a series of
2003 on-disk databases so that SVNCommits can be retrieved on-demand.
2005 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2006 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2007 databases and be fully-featured.
2008 In 'read' mode, PersistenceManager will open existing on-disk databases
2009 and the set_* methods will be unavailable."""
2010 def __init__(self
, mode
):
2012 if mode
not in (DB_OPEN_NEW
, DB_OPEN_READ
):
2013 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2014 self
.svn2cvs_db
= Database(temp(SVN_REVNUMS_TO_CVS_REVS
), mode
)
2015 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS
), pass8
)
2016 self
.cvs2svn_db
= Database(temp(CVS_REVS_TO_SVN_REVNUMS
), mode
)
2017 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS
), pass8
)
2018 self
.svn_commit_names_dates
= Database(temp(SVN_COMMIT_NAMES_DATES
), mode
)
2019 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES
), pass8
)
2020 self
.svn_commit_metadata
= Database(temp(METADATA_DB
), DB_OPEN_READ
)
2021 self
.cvs_revisions
= CVSRevisionDatabase(DB_OPEN_READ
)
2022 ###PERF kff Elsewhere there are comments about sucking the tags db
2023 ### into memory. That seems like a good idea.
2024 if not Ctx().trunk_only
:
2025 self
.tags_db
= TagsDatabase(DB_OPEN_READ
)
2026 self
.motivating_revnums
= SDatabase(temp(MOTIVATING_REVNUMS
), mode
)
2027 Cleanup().register(temp(MOTIVATING_REVNUMS
), pass8
)
2029 # "branch_name" -> svn_revnum in which branch was last filled.
2030 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2031 # revision which would have nothing to do.
2032 self
.last_filled
= {}
2034 def get_svn_revnum(self
, cvs_rev_unique_key
):
2035 """Return the Subversion revision number in which
2036 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2037 is no mapping for CVS_REV_UNIQUE_KEY."""
2038 return int(self
.cvs2svn_db
.get(cvs_rev_unique_key
, SVN_INVALID_REVNUM
))
2040 def get_svn_commit(self
, svn_revnum
):
2041 """Return an SVNCommit that corresponds to SVN_REVNUM.
2043 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2045 This method can throw SVNCommitInternalInconsistencyError.
2047 svn_commit
= SVNCommit("Retrieved from disk", svn_revnum
)
2048 c_rev_keys
= self
.svn2cvs_db
.get(str(svn_revnum
), None)
2049 if c_rev_keys
== None:
2053 for key
in c_rev_keys
:
2054 c_rev
= self
.cvs_revisions
.get_revision(key
)
2055 svn_commit
.add_revision(c_rev
)
2056 # Set the author and log message for this commit by using
2057 # CVSRevision metadata, but only if haven't done so already.
2059 digest
= c_rev
.digest
2060 author
, log_msg
= self
.svn_commit_metadata
[digest
]
2061 svn_commit
.set_author(author
)
2062 svn_commit
.set_log_msg(log_msg
)
2064 # If we're doing a trunk-only conversion, we don't need to do any more
2066 if Ctx().trunk_only
:
2069 name
, date
= self
._get
_name
_and
_date
(svn_revnum
)
2071 svn_commit
.set_symbolic_name(name
)
2072 svn_commit
.set_date(date
)
2073 if self
.tags_db
.has_key(name
):
2074 svn_commit
.is_tag
= 1
2076 motivating_revnum
= self
.motivating_revnums
.get(str(svn_revnum
), None)
2077 if motivating_revnum
:
2078 svn_commit
.set_motivating_revnum(int(motivating_revnum
))
2079 svn_commit
.set_date(date
)
2081 if len(svn_commit
.cvs_revs
) and name
:
2082 raise SVNCommit
.SVNCommitInternalInconsistencyError(
2083 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2084 "symbolic name ('%s') to fill."
2085 % (_clean_symbolic_name(name
),))
2089 def set_cvs_revs(self
, svn_revnum
, cvs_revs
):
2090 """Record the bidirectional mapping between SVN_REVNUM and
2092 if self
.mode
== DB_OPEN_READ
:
2093 raise RuntimeError, \
2094 'Write operation attempted on read-only PersistenceManager'
2095 for c_rev
in cvs_revs
:
2096 Log().write(LOG_VERBOSE
, " ", c_rev
.unique_key())
2097 self
.svn2cvs_db
[str(svn_revnum
)] = [x
.unique_key() for x
in cvs_revs
]
2098 for c_rev
in cvs_revs
:
2099 self
.cvs2svn_db
[c_rev
.unique_key()] = svn_revnum
2101 def set_name_and_date(self
, svn_revnum
, name
, date
):
2102 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2104 NAME is allowed to be None."""
2106 if self
.mode
== DB_OPEN_READ
:
2107 raise RuntimeError, \
2108 'Write operation attempted on read-only PersistenceManager'
2109 self
.svn_commit_names_dates
[str(svn_revnum
)] = (name
, date
)
2110 self
.last_filled
[name
] = svn_revnum
2112 def _get_name_and_date(self
, svn_revnum
):
2113 """Return a tuple containing the symbolic name and date associated
2114 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2115 associated with it."""
2116 return self
.svn_commit_names_dates
.get(str(svn_revnum
), (None, None))
2118 def set_motivating_revnum(self
, svn_revnum
, motivating_revnum
):
2119 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2120 if self
.mode
== DB_OPEN_READ
:
2121 raise RuntimeError, \
2122 'Write operation attempted on read-only PersistenceManager'
2123 self
.motivating_revnums
[str(svn_revnum
)] = str(motivating_revnum
)
2127 """Each instance of this class contains a number of CVS Revisions
2128 that correspond to one or more Subversion Commits. After all CVS
2129 Revisions are added to the grouping, calling process_revisions will
2130 generate a Subversion Commit (or Commits) for the set of CVS
2131 Revisions in the grouping."""
2133 def __init__(self
, digest
, author
, log
):
2134 self
.digest
= digest
2135 self
.author
= author
2138 # Symbolic names for which the last source revision has already
2139 # been seen and for which the CVSRevisionAggregator has already
2140 # generated a fill SVNCommit. See self.process_revisions().
2141 self
.done_symbols
= [ ]
2144 # Lists of CVSRevisions
2148 # Start out with a t_min higher than any incoming time T, and a
2149 # t_max lower than any incoming T. This way the first T will
2150 # push t_min down to T, and t_max up to T, naturally (without any
2151 # special-casing), and successive times will then ratchet them
2152 # outward as appropriate.
2156 # This will be set to the SVNCommit that occurs in self._commit.
2157 self
.motivating_commit
= None
2159 # This is a list of all non-primary commits motivated by the main
2160 # commit. We gather these so that we can set their dates to the
2161 # same date as the primary commit.
2162 self
.secondary_commits
= [ ]
2164 # State for handling default branches.
2166 # Here is a tempting, but ultimately nugatory, bit of logic, which
2167 # I share with you so you may appreciate the less attractive, but
2168 # refreshingly non-nugatory, logic which follows it:
2170 # If some of the commits in this txn happened on a non-trunk
2171 # default branch, then those files will have to be copied into
2172 # trunk manually after being changed on the branch (because the
2173 # RCS "default branch" appears as head, i.e., trunk, in practice).
2174 # As long as those copies don't overwrite any trunk paths that
2175 # were also changed in this commit, then we can do the copies in
2176 # the same revision, because they won't cover changes that don't
2177 # appear anywhere/anywhen else. However, if some of the trunk dst
2178 # paths *did* change in this commit, then immediately copying the
2179 # branch changes would lose those trunk mods forever. So in this
2180 # case, we need to do at least that copy in its own revision. And
2181 # for simplicity's sake, if we're creating the new revision for
2182 # even one file, then we just do all such copies together in the
2185 # Doesn't that sound nice?
2187 # Unfortunately, Subversion doesn't support copies with sources
2188 # in the current txn. All copies must be based in committed
2189 # revisions. Therefore, we generate the above-described new
2190 # revision unconditionally.
2192 # This is a list of c_revs, and a c_rev is appended for each
2193 # default branch commit that will need to be copied to trunk (or
2194 # deleted from trunk) in some generated revision following the
2195 # "regular" revision.
2196 self
.default_branch_cvs_revisions
= [ ]
2198 def __cmp__(self
, other
):
2199 # Commits should be sorted by t_max. If both self and other have
2200 # the same t_max, break the tie using t_min, and lastly, digest
2201 return (cmp(self
.t_max
, other
.t_max
) or cmp(self
.t_min
, other
.t_min
)
2202 or cmp(self
.digest
, other
.digest
))
2204 def has_file(self
, fname
):
2205 return self
.files
.has_key(fname
)
2207 def revisions(self
):
2208 return self
.changes
+ self
.deletes
2210 def opens_symbolic_name(self
, name
):
2211 """Returns true if any CVSRevision in this commit is on a tag or a
2212 branch or is the origin of a tag or branch."""
2213 for c_rev
in self
.revisions():
2214 if c_rev
.opens_symbolic_name(name
):
2218 def add_revision(self
, c_rev
):
2219 # Record the time range of this commit.
2221 # ### ISSUE: It's possible, though unlikely, that the time range
2222 # of a commit could get gradually expanded to be arbitrarily
2223 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2224 # problem, and anyway deciding where to break it up would be a
2225 # judgement call. For now, we just print a warning in commit() if
2227 if c_rev
.timestamp
< self
.t_min
:
2228 self
.t_min
= c_rev
.timestamp
2229 if c_rev
.timestamp
> self
.t_max
:
2230 self
.t_max
= c_rev
.timestamp
2232 if c_rev
.op
== OP_DELETE
:
2233 self
.deletes
.append(c_rev
)
2235 # OP_CHANGE or OP_ADD
2236 self
.changes
.append(c_rev
)
2238 self
.files
[c_rev
.fname
] = 1
2240 def _pre_commit(self
):
2241 """Generates any SVNCommits that must exist before the main
2244 # There may be multiple c_revs in this commit that would cause
2245 # branch B to be filled, but we only want to fill B once. On the
2246 # other hand, there might be multiple branches committed on in
2247 # this commit. Whatever the case, we should count exactly one
2248 # commit per branch, because we only fill a branch once per
2249 # CVSCommit. This list tracks which branches we've already
2251 accounted_for_sym_names
= [ ]
2253 def fill_needed(c_rev
, pm
):
2254 """Return 1 if this is the first commit on a new branch (for
2255 this file) and we need to fill the branch; else return 0
2256 (meaning that some other file's first commit on the branch has
2257 already done the fill for us).
2259 If C_REV.op is OP_ADD, only return 1 if the branch that this
2260 commit is on has no last filled revision.
2262 PM is a PersistenceManager to query.
2265 # Different '.' counts indicate that c_rev is now on a different
2266 # line of development (and may need a fill)
2267 if c_rev
.rev
.count('.') != c_rev
.prev_rev
.count('.'):
2268 svn_revnum
= pm
.get_svn_revnum(c_rev
.unique_key(c_rev
.prev_rev
))
2269 # It should be the case that when we have a file F that
2270 # is added on branch B (thus, F on trunk is in state
2271 # 'dead'), we generate an SVNCommit to fill B iff the branch
2272 # has never been filled before.
2274 # If this c_rev.op == OP_ADD, *and* the branch has never
2275 # been filled before, then fill it now. Otherwise, no need to
2277 if c_rev
.op
== OP_ADD
:
2278 if pm
.last_filled
.get(c_rev
.branch_name
, None) is None:
2280 elif c_rev
.op
== OP_CHANGE
:
2281 if svn_revnum
> pm
.last_filled
.get(c_rev
.branch_name
, 0):
2283 elif c_rev
.op
== OP_DELETE
:
2284 if pm
.last_filled
.get(c_rev
.branch_name
, None) is None:
2288 for c_rev
in self
.changes
+ self
.deletes
:
2289 # If a commit is on a branch, we must ensure that the branch
2290 # path being committed exists (in HEAD of the Subversion
2291 # repository). If it doesn't exist, we will need to fill the
2292 # branch. After the fill, the path on which we're committing
2294 if c_rev
.branch_name \
2295 and c_rev
.branch_name
not in accounted_for_sym_names \
2296 and c_rev
.branch_name
not in self
.done_symbols \
2297 and fill_needed(c_rev
, Ctx()._persistence
_manager
):
2298 svn_commit
= SVNCommit("pre-commit symbolic name '%s'"
2299 % c_rev
.branch_name
)
2300 svn_commit
.set_symbolic_name(c_rev
.branch_name
)
2301 self
.secondary_commits
.append(svn_commit
)
2302 accounted_for_sym_names
.append(c_rev
.branch_name
)
2305 """Generates the primary SVNCommit that corresponds to this
2307 # Generate an SVNCommit unconditionally. Even if the only change
2308 # in this CVSCommit is a deletion of an already-deleted file (that
2309 # is, a CVS revision in state 'dead' whose predecessor was also in
2310 # state 'dead'), the conversion will still generate a Subversion
2311 # revision containing the log message for the second dead
2312 # revision, because we don't want to lose that information.
2313 svn_commit
= SVNCommit("commit")
2314 self
.motivating_commit
= svn_commit
2316 for c_rev
in self
.changes
:
2317 svn_commit
.add_revision(c_rev
)
2318 # Only make a change if we need to. When 1.1.1.1 has an empty
2319 # deltatext, the explanation is almost always that we're looking
2320 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2321 # such imports, CVS creates an RCS file where 1.1 has the
2322 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2323 # content as 1.1. There's no reason to reflect this non-change
2324 # in the repository, so we want to do nothing in this case. (If
2325 # we were really paranoid, we could make sure 1.1's log message
2326 # is the CVS-generated "Initial revision\n", but I think the
2327 # conditions below are strict enough.)
2328 if not ((c_rev
.deltatext_code
== DELTATEXT_EMPTY
)
2329 and (c_rev
.rev
== "1.1.1.1")):
2330 if c_rev
.is_default_branch_revision():
2331 self
.default_branch_cvs_revisions
.append(c_rev
)
2333 for c_rev
in self
.deletes
:
2334 # When a file is added on a branch, CVS not only adds the file
2335 # on the branch, but generates a trunk revision (typically
2336 # 1.1) for that file in state 'dead'. We only want to add
2337 # this revision if the log message is not the standard cvs
2338 # fabricated log message.
2339 if c_rev
.prev_rev
is None:
2340 # c_rev.branches may be empty if the originating branch
2341 # has been excluded.
2342 if not c_rev
.branches
:
2344 cvs_generated_msg
= ('file %s was initially added on branch %s.\n'
2345 % (c_rev
.filename(),
2348 Ctx()._persistence
_manager
.svn_commit_metadata
[c_rev
.digest
]
2349 if log_msg
== cvs_generated_msg
:
2352 svn_commit
.add_revision(c_rev
)
2353 if c_rev
.is_default_branch_revision():
2354 self
.default_branch_cvs_revisions
.append(c_rev
)
2356 # There is a slight chance that we didn't actually register any
2357 # CVSRevisions with our SVNCommit (see loop over self.deletes
2358 # above), so if we have no CVSRevisions, we don't flush the
2359 # svn_commit to disk and roll back our revnum.
2360 if len(svn_commit
.cvs_revs
) > 0:
2363 # We will not be flushing this SVNCommit, so rollback the
2364 # SVNCommit revision counter.
2365 SVNCommit
.revnum
= SVNCommit
.revnum
- 1
2367 if not Ctx().trunk_only
:
2368 for c_rev
in self
.revisions():
2369 Ctx()._symbolings
_logger
.log_revision(c_rev
, svn_commit
.revnum
)
2371 def _post_commit(self
):
2372 """Generates any SVNCommits that we can perform now that _commit
2373 has happened. That is, handle non-trunk default branches.
2374 Sometimes an RCS file has a non-trunk default branch, so a commit
2375 on that default branch would be visible in a default CVS checkout
2376 of HEAD. If we don't copy that commit over to Subversion's trunk,
2377 then there will be no Subversion tree which corresponds to that
2378 CVS checkout. Of course, in order to copy the path over, we may
2379 first need to delete the existing trunk there. """
2381 # Only generate a commit if we have default branch revs
2382 if len(self
.default_branch_cvs_revisions
):
2383 # Generate an SVNCommit for all of our default branch c_revs.
2384 svn_commit
= SVNCommit("post-commit default branch(es)")
2385 svn_commit
.set_motivating_revnum(self
.motivating_commit
.revnum
)
2386 for c_rev
in self
.default_branch_cvs_revisions
:
2387 svn_commit
.add_revision(c_rev
)
2388 Ctx()._symbolings
_logger
.log_default_branch_closing(c_rev
,
2390 self
.secondary_commits
.append(svn_commit
)
2392 def process_revisions(self
, done_symbols
):
2393 """Process all the CVSRevisions that this instance has, creating
2394 one or more SVNCommits in the process. Generate fill SVNCommits
2395 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2398 Return the primary SVNCommit that corresponds to this CVSCommit.
2399 The returned SVNCommit is the commit that motivated any other
2400 SVNCommits generated in this CVSCommit."""
2401 self
.done_symbols
= done_symbols
2402 seconds
= self
.t_max
- self
.t_min
+ 1
2404 Log().write(LOG_VERBOSE
, '-' * 60)
2405 Log().write(LOG_VERBOSE
, 'CVS Revision grouping:')
2407 Log().write(LOG_VERBOSE
, ' Start time: %s (duration: 1 second)'
2408 % time
.ctime(self
.t_max
))
2410 Log().write(LOG_VERBOSE
, ' Start time: %s' % time
.ctime(self
.t_min
))
2411 Log().write(LOG_VERBOSE
, ' End time: %s (duration: %d seconds)'
2412 % (time
.ctime(self
.t_max
), seconds
))
2414 if seconds
> COMMIT_THRESHOLD
+ 1:
2415 Log().write(LOG_WARN
, '%s: grouping spans more than %d seconds'
2416 % (warning_prefix
, COMMIT_THRESHOLD
))
2418 if Ctx().trunk_only
: # Only do the primary commit if we're trunk-only
2420 return self
.motivating_commit
2426 for svn_commit
in self
.secondary_commits
:
2427 svn_commit
.set_date(self
.motivating_commit
.get_date())
2430 return self
.motivating_commit
2434 """This represents one commit to the Subversion Repository. There
2435 are three types of SVNCommits:
2437 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2439 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2441 3. Updates trunk to reflect the contents of a particular branch
2442 (this is to handle RCS default branches)."""
2444 # The revision number to assign to the next new SVNCommit.
2445 # We start at 2 because SVNRepositoryMirror uses the first commit
2446 # to create trunk, tags, and branches.
2449 class SVNCommitInternalInconsistencyError(Exception):
2450 """Exception raised if we encounter an impossible state in the
2451 SVNCommit Databases."""
2454 def __init__(self
, description
="", revnum
=None, cvs_revs
=None):
2455 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2456 If REVNUM, the SVNCommit will correspond to that revision number;
2457 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2460 It is an error to pass CVS_REVS without REVNUM, but you may pass
2461 REVNUM without CVS_REVS, and then add a revision at a time by
2462 invoking add_revision()."""
2463 self
._description
= description
2465 # Revprop metadata for this commit.
2467 # These initial values are placeholders. At least the log and the
2468 # date should be different by the time these are used.
2470 # They are private because their values should be returned encoded
2471 # in UTF8, but callers aren't required to set them in UTF8.
2472 # Therefore, accessor methods are used to set them, and
2473 # self.get_revprops() is used to to get them, in dictionary form.
2474 self
._author
= Ctx().username
2475 self
._log
_msg
= "This log message means an SVNCommit was used too soon."
2476 self
._max
_date
= 0 # Latest date seen so far.
2478 self
.cvs_revs
= cvs_revs
or []
2480 self
.revnum
= revnum
2482 self
.revnum
= SVNCommit
.revnum
2483 SVNCommit
.revnum
= SVNCommit
.revnum
+ 1
2485 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2486 self
.symbolic_name
= None
2488 # If this commit is a default branch synchronization, this
2489 # variable represents the subversion revision number of the
2490 # *primary* commit where the default branch changes actually
2491 # happened. It is None otherwise.
2493 # It is possible for multiple synchronization commits to refer to
2494 # the same motivating commit revision number, and it is possible
2495 # for a single synchronization commit to contain CVSRevisions on
2496 # multiple different default branches.
2497 self
.motivating_revnum
= None
2499 # is_tag is true only if this commit is a fill of a symbolic name
2500 # that is a tag, None in all other cases.
2503 def set_symbolic_name(self
, symbolic_name
):
2504 "Set self.symbolic_name to SYMBOLIC_NAME."
2505 self
.symbolic_name
= symbolic_name
2507 def set_motivating_revnum(self
, revnum
):
2508 "Set self.motivating_revnum to REVNUM."
2509 self
.motivating_revnum
= revnum
2511 def set_author(self
, author
):
2512 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2513 This is the only way to set an SVNCommit's author."""
2514 self
._author
= author
2516 def set_log_msg(self
, msg
):
2517 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2518 This is the only way to set an SVNCommit's log message."""
2521 def set_date(self
, date
):
2522 """Set this SVNCommit's date to DATE (an integer).
2523 Note that self.add_revision() updates this automatically based on
2524 a CVSRevision; so you may not need to call this at all, and even
2525 if you do, the value may be overwritten by a later call to
2526 self.add_revision()."""
2527 self
._max
_date
= date
2530 """Returns this SVNCommit's date as an integer."""
2531 return self
._max
_date
2533 def get_revprops(self
):
2534 """Return the Subversion revprops for this SVNCommit."""
2535 date
= format_date(self
._max
_date
)
2538 if self
._author
is not None:
2539 utf8_author
= to_utf8(self
._author
)
2540 utf8_log
= to_utf8(self
.get_log_msg())
2541 return { 'svn:author' : utf8_author
,
2542 'svn:log' : utf8_log
,
2544 except UnicodeError:
2545 Log().write(LOG_WARN
, '%s: problem encoding author or log message:'
2547 Log().write(LOG_WARN
, " author: '%s'" % self
._author
)
2548 Log().write(LOG_WARN
, " log: '%s'" % self
.get_log_msg().rstrip())
2549 Log().write(LOG_WARN
, " date: '%s'" % date
)
2550 Log().write(LOG_WARN
,
2551 "(subversion rev %s) Related files:" % self
.revnum
)
2552 for c_rev
in self
.cvs_revs
:
2553 Log().write(LOG_WARN
, " ", c_rev
.fname
)
2555 Log().write(LOG_WARN
, "Consider rerunning with (for example)",
2556 "'--encoding=latin1'.\n")
2557 # It's better to fall back to the original (unknown encoding) data
2558 # than to either 1) quit or 2) record nothing at all.
2559 return { 'svn:author' : self
._author
,
2560 'svn:log' : self
.get_log_msg(),
2563 def add_revision(self
, cvs_rev
):
2564 self
.cvs_revs
.append(cvs_rev
)
2565 if cvs_rev
.timestamp
> self
._max
_date
:
2566 self
._max
_date
= cvs_rev
.timestamp
2568 def _is_primary_commit(self
):
2569 """Return true if this is a primary SVNCommit, false otherwise."""
2570 return not (self
.symbolic_name
or self
.motivating_revnum
)
2573 Log().write(LOG_NORMAL
, "Creating Subversion r%d (%s)"
2574 % (self
.revnum
, self
._description
))
2575 Ctx()._persistence
_manager
.set_cvs_revs(self
.revnum
, self
.cvs_revs
)
2577 if self
.motivating_revnum
is not None:
2578 Ctx()._persistence
_manager
.set_motivating_revnum(self
.revnum
,
2579 self
.motivating_revnum
)
2581 # If we're not a primary commit, then store our date and/or our
2583 if not self
._is
_primary
_commit
():
2584 Ctx()._persistence
_manager
.set_name_and_date(
2585 self
.revnum
, self
.symbolic_name
, self
._max
_date
)
2588 """ Print a human-readable description of this SVNCommit. This
2589 description is not intended to be machine-parseable (although
2590 we're not going to stop you if you try!)"""
2592 ret
= "SVNCommit #: " + str(self
.revnum
) + "\n"
2593 if self
.symbolic_name
:
2594 ret
+= (" symbolic name: " + _clean_symbolic_name(self
.symbolic_name
)
2597 ret
+= " NO symbolic name\n"
2598 ret
+= " debug description: " + self
._description
+ "\n"
2599 ret
+= " cvs_revs:\n"
2600 for c_rev
in self
.cvs_revs
:
2601 ret
+= " " + c_rev
.unique_key() + "\n"
2604 def get_log_msg(self
):
2605 """Returns the actual log message for a primary commit, and the
2606 appropriate manufactured log message for a secondary commit."""
2607 if self
.symbolic_name
is not None:
2608 return self
._log
_msg
_for
_symbolic
_name
_commit
()
2609 elif self
.motivating_revnum
is not None:
2610 return self
._log
_msg
_for
_default
_branch
_commit
()
2612 return self
._log
_msg
2614 def _log_msg_for_symbolic_name_commit(self
):
2615 """Creates a log message for a manufactured commit that fills
2616 self.symbolic_name. If self.is_tag is true, write the log message
2617 as though for a tag, else write it as though for a branch."""
2622 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2623 space_or_newline
= ' '
2624 cleaned_symbolic_name
= _clean_symbolic_name(self
.symbolic_name
)
2625 if len(cleaned_symbolic_name
) >= 13:
2626 space_or_newline
= '\n'
2628 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2629 % (type, space_or_newline
, cleaned_symbolic_name
)
2631 def _log_msg_for_default_branch_commit(self
):
2632 """Creates a log message for a manufactured commit that
2633 synchronizes a non-trunk default branch with trunk."""
2634 msg
= 'This commit was generated by cvs2svn to compensate for ' \
2635 'changes in r%d,\n' \
2636 'which included commits to RCS files with non-trunk default ' \
2637 'branches.\n' % self
.motivating_revnum
2640 class CVSRevisionAggregator
:
2641 """This class groups CVSRevisions into CVSCommits that represent
2642 at least one SVNCommit."""
2644 self
.metadata_db
= Database(temp(METADATA_DB
), DB_OPEN_READ
)
2645 if not Ctx().trunk_only
:
2646 self
.last_revs_db
= Database(temp(SYMBOL_LAST_CVS_REVS_DB
),
2649 # A map { key : CVSCommit } of CVS commits currently being
2650 # accumulated. If the CVSCommit is still open to further
2651 # CVSRevisions, then key is CVSRevision.digest. If not (because
2652 # an inbound commit wanted to affect a file that was already
2653 # within the CVSCommit), then key is CVSRevision.digest plus some
2654 # number of appended '-'.
2655 self
.cvs_commits
= {}
2657 # A map { symbol : None } of symbolic names for which the last
2658 # source CVSRevision has already been processed but which haven't
2660 self
.pending_symbols
= {}
2662 # A list of closed symbols. That is, we've already encountered
2663 # the last CVSRevision that is a source for that symbol, the final
2664 # fill for this symbol has been done, and we never need to fill it
2666 self
.done_symbols
= [ ]
2668 # This variable holds the most recently created primary svn_commit
2669 # object. CVSRevisionAggregator maintains this variable merely
2670 # for its date, so that it can set dates for the SVNCommits
2671 # created in self._attempt_to_commit_symbols().
2672 self
.latest_primary_svn_commit
= None
2674 Ctx()._symbolings
_logger
= SymbolingsLogger()
2675 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_NEW
)
2676 Ctx()._default
_branches
_db
= SDatabase(temp(DEFAULT_BRANCHES_DB
),
2679 def _extract_ready_commits(self
, timestamp
):
2680 """Extract and return any active commits that expire by TIMESTAMP."""
2683 for digest_key
, cvs_commit
in self
.cvs_commits
.items():
2684 if cvs_commit
.t_max
+ COMMIT_THRESHOLD
< timestamp
:
2685 ready_queue
.append(cvs_commit
)
2686 del self
.cvs_commits
[digest_key
]
2689 def process_revision(self
, c_rev
):
2690 # Each time we read a new line, scan the accumulating commits to
2691 # see if any are ready for processing.
2692 ready_queue
= self
._extract
_ready
_commits
(c_rev
.timestamp
)
2694 for digest_key
, cvs_commit
in self
.cvs_commits
.items():
2695 # If the inbound commit is on the same file as a pending commit,
2696 # close the pending commit to further changes. Don't flush it though,
2697 # as there may be other pending commits dated before this one.
2698 # ### ISSUE: the has_file() check below is not optimal.
2699 # It does fix the dataloss bug where revisions would get lost
2700 # if checked in too quickly, but it can also break apart the
2701 # commits. The correct fix would require tracking the dependencies
2702 # between change sets and committing them in proper order.
2703 if cvs_commit
.has_file(c_rev
.fname
):
2704 unused_id
= digest_key
+ '-'
2705 # Find a string that does is not already a key in
2706 # the self.cvs_commits dict
2707 while self
.cvs_commits
.has_key(unused_id
):
2708 unused_id
= unused_id
+ '-'
2709 self
.cvs_commits
[unused_id
] = cvs_commit
2710 del self
.cvs_commits
[digest_key
]
2712 # Add this item into the set of still-available commits.
2713 if self
.cvs_commits
.has_key(c_rev
.digest
):
2714 cvs_commit
= self
.cvs_commits
[c_rev
.digest
]
2716 author
, log
= self
.metadata_db
[c_rev
.digest
]
2717 cvs_commit
= CVSCommit(c_rev
.digest
, author
, log
)
2718 self
.cvs_commits
[c_rev
.digest
] = cvs_commit
2719 cvs_commit
.add_revision(c_rev
)
2722 # Any elements in the ready_queue at this point need to be
2723 # processed, because this latest rev couldn't possibly be part
2724 # of any of them. Sort them into time-order, then process 'em.
2728 cvs_commit
= ready_queue
.pop(0)
2729 self
.latest_primary_svn_commit
= \
2730 cvs_commit
.process_revisions(self
.done_symbols
)
2731 self
._add
_pending
_symbols
(c_rev
)
2732 self
._attempt
_to
_commit
_symbols
(ready_queue
)
2734 # Make sure we _add_pending_symbols() for this c_rev and
2735 # _attempt_to_commit_symbols(), even if no commits are ready.
2736 self
._add
_pending
_symbols
(c_rev
)
2737 self
._attempt
_to
_commit
_symbols
(ready_queue
)
2740 """Commit anything left in self.cvs_commits. Then inform the
2741 SymbolingsLogger that all commits are done."""
2744 for k
, v
in self
.cvs_commits
.items():
2745 ready_queue
.append((v
, k
))
2749 (cvs_commit
, key
) = ready_queue
.pop(0)
2750 self
.latest_primary_svn_commit
= \
2751 cvs_commit
.process_revisions(self
.done_symbols
)
2752 del self
.cvs_commits
[key
]
2753 self
._attempt
_to
_commit
_symbols
([])
2755 if not Ctx().trunk_only
:
2756 Ctx()._symbolings
_logger
.close()
2758 def _add_pending_symbols(self
, c_rev
):
2759 """Add to self.pending_symbols any symbols from C_REV for which
2760 C_REV is the last CVSRevision.
2762 If we're not doing a trunk-only conversion, get the symbolic names
2763 that this c_rev is the last *source* CVSRevision for and add them
2764 to those left over from previous passes through the aggregator."""
2766 if not Ctx().trunk_only
:
2767 for sym
in self
.last_revs_db
.get(c_rev
.unique_key(), []):
2768 self
.pending_symbols
[sym
] = None
2770 def _attempt_to_commit_symbols(self
, queued_commits
):
2771 """Generate one SVNCommit for each symbol in self.pending_symbols
2772 that doesn't have an opening CVSRevision in either QUEUED_COMMITS
2773 or self.cvs_commits.values()."""
2775 # Make a list of all symbols from self.pending_symbols that do not
2776 # have *source* CVSRevisions in the pending commit queue
2777 # (self.cvs_commits) or in queued_commits:
2778 closeable_symbols
= []
2779 for sym
in self
.pending_symbols
:
2780 for cvs_commit
in self
.cvs_commits
.values() + queued_commits
:
2781 if cvs_commit
.opens_symbolic_name(sym
):
2784 closeable_symbols
.append(sym
)
2786 # Sort the closeable symbols so that we will always process the
2787 # symbols in the same order, regardless of the order in which the
2788 # dict hashing algorithm hands them back to us. We do this so
2789 # that our tests will get the same results on all platforms.
2790 closeable_symbols
.sort()
2791 for sym
in closeable_symbols
:
2792 svn_commit
= SVNCommit("closing tag/branch '%s'" % sym
)
2793 svn_commit
.set_symbolic_name(sym
)
2794 svn_commit
.set_date(self
.latest_primary_svn_commit
.get_date())
2796 self
.done_symbols
.append(sym
)
2797 del self
.pending_symbols
[sym
]
2800 class SymbolingsReader
:
2801 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2802 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2803 returning the correct opening and closing Subversion revision
2804 numbers for a given symbolic name."""
2806 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2807 reads the offsets database into memory."""
2808 self
.symbolings
= open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED
), 'r')
2809 # The offsets_db is really small, and we need to read and write
2810 # from it a fair bit, so suck it into memory
2811 offsets_db
= Database(temp(SYMBOL_OFFSETS_DB
), DB_OPEN_READ
)
2813 for key
in offsets_db
:
2814 #print " ZOO:", key, offsets_db[key]
2815 self
.offsets
[key
] = offsets_db
[key
]
2817 def filling_guide_for_symbol(self
, symbolic_name
, svn_revnum
):
2818 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2819 SymbolicNameFillingGuide object.
2821 Note that if we encounter an opening rev in this fill, but the
2822 corresponding closing rev takes place later than SVN_REVNUM, the
2823 closing will not be passed to SymbolicNameFillingGuide in this
2824 fill (and will be discarded when encountered in a later fill).
2825 This is perfectly fine, because we can still do a valid fill
2826 without the closing--we always try to fill what we can as soon as
2829 openings_closings_map
= OpeningsClosingsMap(symbolic_name
)
2831 # It's possible to have a branch start with a file that was added
2833 if self
.offsets
.has_key(symbolic_name
):
2834 # set our read offset for self.symbolings to the offset for
2836 self
.symbolings
.seek(self
.offsets
[symbolic_name
])
2839 fpos
= self
.symbolings
.tell()
2840 line
= self
.symbolings
.readline().rstrip()
2843 name
, revnum
, type, branch_name
, cvs_path
= line
.split(" ", 4)
2844 if branch_name
== '*':
2845 svn_path
= Ctx().project
.make_trunk_path(cvs_path
)
2847 svn_path
= Ctx().project
.make_branch_path(branch_name
, cvs_path
)
2848 revnum
= int(revnum
)
2849 if revnum
> svn_revnum
or name
!= symbolic_name
:
2851 openings_closings_map
.register(svn_path
, revnum
, type)
2853 # get current offset of the read marker and set it to the offset
2854 # for the beginning of the line we just read if we used anything
2856 if not openings_closings_map
.is_empty():
2857 self
.offsets
[symbolic_name
] = fpos
2859 return SymbolicNameFillingGuide(openings_closings_map
)
2862 class SvnRevisionRange
:
2863 """The range of subversion revision numbers from which a path can be
2864 copied. self.opening_revnum is the number of the earliest such
2865 revision, and self.closing_revnum is one higher than the number of
2866 the last such revision. If self.closing_revnum is None, then no
2867 closings were registered."""
2869 def __init__(self
, opening_revnum
):
2870 self
.opening_revnum
= opening_revnum
2871 self
.closing_revnum
= None
2873 def add_closing(self
, closing_revnum
):
2874 # When we have a non-trunk default branch, we may have multiple
2875 # closings--only register the first closing we encounter.
2876 if self
.closing_revnum
is None:
2877 self
.closing_revnum
= closing_revnum
2880 if self
.closing_revnum
is None:
2881 return '[%d:]' % (self
.opening_revnum
,)
2883 return '[%d:%d]' % (self
.opening_revnum
, self
.closing_revnum
,)
2886 class OpeningsClosingsMap
:
2887 """A dictionary of openings and closings for a symbolic name in the
2890 The user should call self.register() for the openings and closings,
2891 then self.get_node_tree() to retrieve the information as a
2892 SymbolicNameFillingGuide."""
2894 def __init__(self
, symbolic_name
):
2895 """Initialize OpeningsClosingsMap and prepare it for receiving
2896 openings and closings."""
2898 self
.name
= symbolic_name
2900 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2903 def register(self
, svn_path
, svn_revnum
, type):
2904 """Register an opening or closing revision for this symbolic name.
2905 SVN_PATH is the source path that needs to be copied into
2906 self.symbolic_name, and SVN_REVNUM is either the first svn
2907 revision number that we can copy from (our opening), or the last
2908 (not inclusive) svn revision number that we can copy from (our
2909 closing). TYPE indicates whether this path is an opening or a a
2912 The opening for a given SVN_PATH must be passed before the closing
2913 for it to have any effect... any closing encountered before a
2914 corresponding opening will be discarded.
2916 It is not necessary to pass a corresponding closing for every
2919 # Always log an OPENING
2921 self
.things
[svn_path
] = SvnRevisionRange(svn_revnum
)
2922 # Only log a closing if we've already registered the opening for that
2924 elif type == CLOSING
and self
.things
.has_key(svn_path
):
2925 self
.things
[svn_path
].add_closing(svn_revnum
)
2928 """Return true if we haven't accumulated any openings or closings,
2930 return not len(self
.things
)
2932 def get_things(self
):
2933 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2934 svn_paths with registered openings or closings."""
2936 return self
.things
.items()
2939 class SymbolicNameFillingGuide
:
2940 """A node tree representing the source paths to be copied to fill
2941 self.symbolic_name in the current SVNCommit.
2943 self._node_tree is the root of the directory tree, in the form {
2944 path_component : subnode }. Leaf nodes are instances of
2945 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2946 mapping relative names to subnodes.
2948 By walking self._node_tree and calling self.get_best_revnum() on
2949 each node, the caller can determine what subversion revision number
2950 to copy the path corresponding to that node from. self._node_tree
2951 should be treated as read-only.
2953 The caller can then descend to sub-nodes to see if their "best
2954 revnum" differs from their parents' and if it does, take appropriate
2955 actions to "patch up" the subtrees."""
2957 def __init__(self
, openings_closings_map
):
2958 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2959 store into it the openings and closings from
2960 OPENINGS_CLOSINGS_MAP."""
2962 self
.name
= openings_closings_map
.name
2964 # The dictionary that holds our node tree as a map { node_key :
2966 self
._node
_tree
= { }
2968 for svn_path
, svn_revision_range
in openings_closings_map
.get_things():
2969 (head
, tail
) = _path_split(svn_path
)
2970 self
._get
_node
_for
_path
(head
)[tail
] = svn_revision_range
2972 #self.print_node_tree(self._node_tree)
2974 def _get_node_for_path(self
, svn_path
):
2975 """Return the node key for svn_path, creating new nodes as needed."""
2976 # Walk down the path, one node at a time.
2977 node
= self
._node
_tree
2978 for component
in svn_path
.split('/'):
2979 if node
.has_key(component
):
2980 node
= node
[component
]
2984 old_node
[component
] = node
2988 def get_best_revnum(self
, node
, preferred_revnum
):
2989 """Determine the best subversion revision number to use when
2990 copying the source tree beginning at NODE. Returns a
2991 subversion revision number.
2993 PREFERRED_REVNUM is passed to best_rev and used to calculate the
2996 def score_revisions(svn_revision_ranges
):
2997 """Return a list of revisions and scores based on
2998 SVN_REVISION_RANGES. The returned list looks like:
3000 [(REV1 SCORE1), (REV2 SCORE2), ...]
3002 where the tuples are sorted by revision number.
3003 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
3005 For each svn revision that appears as either an opening_revnum
3006 or closing_revnum for one of the svn_revision_ranges, output a
3007 tuple indicating how many of the SvnRevisionRanges include that
3008 svn_revision in its range. A score thus indicates that copying
3009 the corresponding revision (or any following revision up to the
3010 next revision in the list) of the object in question would yield
3011 that many correct paths at or underneath the object. There may
3012 be other paths underneath it which are not correct and would
3013 need to be deleted or recopied; those can only be detected by
3014 descending and examining their scores.
3016 If OPENINGS is empty, return the empty list."""
3017 openings
= [ x
.opening_revnum
3018 for x
in svn_revision_ranges
]
3019 closings
= [ x
.closing_revnum
3020 for x
in svn_revision_ranges
3021 if x
.closing_revnum
is not None ]
3023 # First look for easy out.
3027 # Create a list with both openings (which increment the total)
3028 # and closings (which decrement the total):
3029 things
= [(rev
,1) for rev
in openings
] + [(rev
,-1) for rev
in closings
]
3030 # Sort by revision number:
3032 # Initialize output list with zeroth element of things. This
3033 # element must exist, because it was already verified that
3034 # openings is not empty.
3035 scores
= [ things
[0] ]
3036 total
= scores
[-1][1]
3037 for (rev
, change
) in things
[1:]:
3039 if rev
== scores
[-1][0]:
3040 # Same revision as last entry; modify last entry:
3041 scores
[-1] = (rev
, total
)
3043 # Previously-unseen revision; create new entry:
3044 scores
.append((rev
, total
))
3047 def best_rev(scores
, preferred_rev
):
3048 """Return the revision with the highest score from SCORES, a list
3049 returned by score_revisions(). When the maximum score is shared
3050 by multiple revisions, the oldest revision is selected, unless
3051 PREFERRED_REV is one of the possibilities, in which case, it is
3054 preferred_rev_score
= -1
3055 rev
= SVN_INVALID_REVNUM
3056 if preferred_rev
is None:
3057 # Comparison order of different types is arbitrary. Do not
3058 # expect None to compare less than int values below.
3059 preferred_rev
= SVN_INVALID_REVNUM
3060 for revnum
, count
in scores
:
3061 if count
> max_score
:
3064 if revnum
<= preferred_rev
:
3065 preferred_rev_score
= count
3066 if preferred_rev_score
== max_score
:
3068 return rev
, max_score
3070 # Aggregate openings and closings from the rev tree
3071 svn_revision_ranges
= self
._list
_revnums
(node
)
3074 scores
= score_revisions(svn_revision_ranges
)
3076 revnum
, max_score
= best_rev(scores
, preferred_revnum
)
3078 if revnum
== SVN_INVALID_REVNUM
:
3079 raise FatalError("failed to find a revision "
3080 + "to copy from when copying %s" % name
)
3081 return revnum
, max_score
3083 def _list_revnums(self
, node
):
3084 """Return a list of all the SvnRevisionRanges (including
3085 duplicates) for all leaf nodes at and under NODE."""
3087 if isinstance(node
, SvnRevisionRange
):
3088 # It is a leaf node.
3091 # It is an intermediate node.
3093 for key
, subnode
in node
.items():
3094 revnums
.extend(self
._list
_revnums
(subnode
))
3097 def get_sources(self
):
3098 """Return the list of sources for this symbolic name.
3100 The Project instance defines what are legitimate sources. Raise
3101 an exception if a change occurred outside of the source
3104 return self
._get
_sub
_sources
('', self
._node
_tree
)
3106 def _get_sub_sources(self
, start_svn_path
, start_node
):
3107 """Return the list of sources for this symbolic name, starting the
3108 search at path START_SVN_PATH, which is node START_NODE. This is
3109 a helper method, called by get_sources() (see)."""
3111 project
= Ctx().project
3112 if isinstance(start_node
, SvnRevisionRange
):
3113 # This implies that a change was found outside of the
3114 # legitimate sources. This should never happen.
3116 elif project
.is_source(start_svn_path
):
3117 # This is a legitimate source. Add it to list.
3118 return [ FillSource(start_svn_path
, start_node
) ]
3120 # This is a directory that is not a legitimate source. (That's
3121 # OK because it hasn't changed directly.) But directories
3122 # within it have been changed, so we need to search recursively
3123 # to find their enclosing sources.
3125 for entry
, node
in start_node
.items():
3126 svn_path
= _path_join(start_svn_path
, entry
)
3127 sources
.extend(self
._get
_sub
_sources
(svn_path
, node
))
3131 def print_node_tree(self
, node
, name
='/', indent_depth
=0):
3132 """For debugging purposes. Prints all nodes in TREE that are
3133 rooted at NODE. INDENT_DEPTH is used to indent the output of
3135 if not indent_depth
:
3136 print "TREE", "=" * 75
3137 if isinstance(node
, SvnRevisionRange
):
3138 print "TREE:", " " * (indent_depth
* 2), name
, node
3140 print "TREE:", " " * (indent_depth
* 2), name
3141 for key
, value
in node
.items():
3142 self
.print_node_tree(value
, key
, (indent_depth
+ 1))
3146 """Representation of a fill source used by the symbol filler in
3147 SVNRepositoryMirror."""
3148 def __init__(self
, prefix
, node
):
3149 """Create an unscored fill source with a prefix and a key."""
3150 self
.prefix
= prefix
3155 def set_score(self
, score
, revnum
):
3156 """Set the SCORE and REVNUM."""
3158 self
.revnum
= revnum
3160 def __cmp__(self
, other
):
3161 """Comparison operator used to sort FillSources in descending
3163 if self
.score
is None or other
.score
is None:
3164 raise TypeError, 'Tried to compare unscored FillSource'
3165 return cmp(other
.score
, self
.score
)
3168 class SVNRepositoryMirror
:
3169 """Mirror a Subversion Repository as it is constructed, one
3170 SVNCommit at a time. The mirror is skeletal; it does not contain
3171 file contents. The creation of a dumpfile or Subversion repository
3172 is handled by delegates. See self.add_delegate method for how to
3175 The structure of the repository is kept in two databases and one
3176 hash. The revs_db database maps revisions to root node keys, and
3177 the nodes_db database maps node keys to nodes. A node is a hash
3178 from directory names to keys. Both the revs_db and the nodes_db are
3179 stored on disk and each access is expensive.
3181 The nodes_db database only has the keys for old revisions. The
3182 revision that is being contructed is kept in memory in the new_nodes
3183 hash which is cheap to access.
3185 You must invoke _start_commit between SVNCommits.
3187 *** WARNING *** All path arguments to methods in this class CANNOT
3188 have leading or trailing slashes.
3191 class SVNRepositoryMirrorPathExistsError(Exception):
3192 """Exception raised if an attempt is made to add a path to the
3193 repository mirror and that path already exists in the youngest
3194 revision of the repository."""
3197 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3198 """Exception raised if a CVSRevision is found to have an unexpected
3199 operation (OP) value."""
3202 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3203 """Exception raised if an empty SymbolicNameFillingGuide is returned
3204 during a fill where the branch in question already exists."""
3208 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3209 self
.delegates
= [ ]
3211 # This corresponds to the 'revisions' table in a Subversion fs.
3212 self
.revs_db
= SDatabase(temp(SVN_MIRROR_REVISIONS_DB
), DB_OPEN_NEW
)
3213 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB
), pass8
)
3215 # This corresponds to the 'nodes' table in a Subversion fs. (We
3216 # don't need a 'representations' or 'strings' table because we
3217 # only track metadata, not file contents.)
3218 self
.nodes_db
= Database(temp(SVN_MIRROR_NODES_DB
), DB_OPEN_NEW
)
3219 Cleanup().register(temp(SVN_MIRROR_NODES_DB
), pass8
)
3221 # Start at revision 0 without a root node. It will be created
3222 # by _open_writable_root_node.
3224 self
.new_root_key
= None
3225 self
.new_nodes
= { }
3227 if not Ctx().trunk_only
:
3228 ###PERF IMPT: Suck this into memory.
3229 self
.tags_db
= TagsDatabase(DB_OPEN_READ
)
3230 self
.symbolings_reader
= SymbolingsReader()
3232 def _initialize_repository(self
, date
):
3233 """Initialize the repository by creating the directories for
3234 trunk, tags, and branches. This method should only be called
3235 after all delegates are added to the repository mirror."""
3236 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3238 svn_commit
= SVNCommit("Initialization", 1)
3239 svn_commit
.set_date(date
)
3240 svn_commit
.set_log_msg("New repository initialized by cvs2svn.")
3242 self
._start
_commit
(svn_commit
)
3243 self
._mkdir
(Ctx().project
.trunk_path
)
3244 if not Ctx().trunk_only
:
3245 self
._mkdir
(Ctx().project
.branches_path
)
3246 self
._mkdir
(Ctx().project
.tags_path
)
3248 def _start_commit(self
, svn_commit
):
3249 """Start a new commit."""
3250 if self
.youngest
> 0:
3253 self
.youngest
= svn_commit
.revnum
3254 self
.new_root_key
= None
3255 self
.new_nodes
= { }
3257 self
._invoke
_delegates
('start_commit', svn_commit
)
3259 def _end_commit(self
):
3260 """Called at the end of each commit. This method copies the newly
3261 created nodes to the on-disk nodes db."""
3262 if self
.new_root_key
is None:
3263 # No changes were made in this revision, so we make the root node
3264 # of the new revision be the same as the last one.
3265 self
.revs_db
[str(self
.youngest
)] = self
.revs_db
[str(self
.youngest
- 1)]
3267 self
.revs_db
[str(self
.youngest
)] = self
.new_root_key
3268 # Copy the new nodes to the nodes_db
3269 for key
, value
in self
.new_nodes
.items():
3270 self
.nodes_db
[key
] = value
3272 def _get_node(self
, key
):
3273 """Returns the node contents for KEY which may refer to either
3274 self.nodes_db or self.new_nodes."""
3275 if self
.new_nodes
.has_key(key
):
3276 return self
.new_nodes
[key
]
3278 return self
.nodes_db
[key
]
3280 def _open_readonly_node(self
, path
, revnum
):
3281 """Open a readonly node for PATH at revision REVNUM. Returns the
3282 node key and node contents if the path exists, else (None, None)."""
3284 if revnum
== self
.youngest
:
3285 if self
.new_root_key
is None:
3286 node_key
= self
.revs_db
[str(self
.youngest
- 1)]
3288 node_key
= self
.new_root_key
3290 node_key
= self
.revs_db
[str(revnum
)]
3292 for component
in path
.split('/'):
3293 node_contents
= self
._get
_node
(node_key
)
3294 node_key
= node_contents
.get(component
, None)
3295 if node_key
is None:
3300 def _open_writable_root_node(self
):
3301 """Open a writable root node. The current root node is returned
3302 immeditely if it is already writable. If not, create a new one by
3303 copying the contents of the root node of the previous version."""
3304 if self
.new_root_key
is not None:
3305 return self
.new_root_key
, self
.new_nodes
[self
.new_root_key
]
3307 if self
.youngest
< 2:
3310 new_contents
= self
.nodes_db
[self
.revs_db
[str(self
.youngest
- 1)]]
3311 self
.new_root_key
= gen_key()
3312 self
.new_nodes
= { self
.new_root_key
: new_contents
}
3314 return self
.new_root_key
, new_contents
3316 def _open_writable_node(self
, svn_path
, create
):
3317 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3318 and any missing directories if CREATE is True."""
3319 parent_key
, parent_contents
= self
._open
_writable
_root
_node
()
3321 # Walk up the path, one node at a time.
3323 components
= svn_path
.split('/')
3324 for i
in range(len(components
)):
3325 component
= components
[i
]
3326 path_so_far
= _path_join(path_so_far
, component
)
3327 this_key
= parent_contents
.get(component
, None)
3328 if this_key
is not None:
3329 # The component exists.
3330 this_contents
= self
.new_nodes
.get(this_key
, None)
3331 if this_contents
is None:
3332 # Suck the node from the nodes_db, but update the key
3333 this_contents
= self
.nodes_db
[this_key
]
3334 this_key
= gen_key()
3335 self
.new_nodes
[this_key
] = this_contents
3336 parent_contents
[component
] = this_key
3338 # The component does not exists, so we create it.
3340 this_key
= gen_key()
3341 self
.new_nodes
[this_key
] = this_contents
3342 parent_contents
[component
] = this_key
3343 if i
< len(components
) - 1:
3344 self
._invoke
_delegates
('mkdir', path_so_far
)
3346 # The component does not exists and we are not instructed to
3347 # create it, so we give up.
3350 parent_key
= this_key
3351 parent_contents
= this_contents
3353 return this_key
, this_contents
3355 def _path_exists(self
, path
):
3356 """If PATH exists in self.youngest of the svn repository mirror,
3357 return true, else return None.
3359 PATH must not start with '/'."""
3360 return self
._open
_readonly
_node
(path
, self
.youngest
) is not None
3362 def _fast_delete_path(self
, parent_path
, parent_contents
, component
):
3363 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3364 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3365 in PARENT_CONTENTS."""
3366 if parent_contents
.has_key(component
):
3367 del parent_contents
[component
]
3368 self
._invoke
_delegates
('delete_path',
3369 _path_join(parent_path
, component
))
3371 def _delete_path(self
, svn_path
, should_prune
=False):
3372 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3373 all ancestor directories that are made empty when SVN_PATH is deleted.
3374 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3376 NOTE: This function ignores requests to delete the root directory
3377 or any directory for which Ctx().project.is_unremovable() returns
3378 True, either directly or by pruning."""
3380 if svn_path
== '' or Ctx().project
.is_unremovable(svn_path
):
3383 (parent_path
, entry
,) = _path_split(svn_path
)
3385 parent_key
, parent_contents
= \
3386 self
._open
_writable
_node
(parent_path
, False)
3388 parent_key
, parent_contents
= self
._open
_writable
_root
_node
()
3390 if parent_key
is not None:
3391 self
._fast
_delete
_path
(parent_path
, parent_contents
, entry
)
3392 # The following recursion makes pruning an O(n^2) operation in the
3393 # worst case (where n is the depth of SVN_PATH), but the worst case
3394 # is probably rare, and the constant cost is pretty low. Another
3395 # drawback is that we issue a delete for each path and not just
3396 # a single delete for the topmost directory pruned.
3397 if should_prune
and len(parent_contents
) == 0:
3398 self
._delete
_path
(parent_path
, True)
3400 def _mkdir(self
, path
):
3401 """Create PATH in the repository mirror at the youngest revision."""
3402 self
._open
_writable
_node
(path
, True)
3403 self
._invoke
_delegates
('mkdir', path
)
3405 def _change_path(self
, cvs_rev
):
3406 """Register a change in self.youngest for the CVS_REV's svn_path
3407 in the repository mirror."""
3408 # We do not have to update the nodes because our mirror is only
3409 # concerned with the presence or absence of paths, and a file
3410 # content change does not cause any path changes.
3411 self
._invoke
_delegates
('change_path', SVNCommitItem(cvs_rev
, False))
3413 def _add_path(self
, cvs_rev
):
3414 """Add the CVS_REV's svn_path to the repository mirror."""
3415 self
._open
_writable
_node
(cvs_rev
.svn_path
, True)
3416 self
._invoke
_delegates
('add_path', SVNCommitItem(cvs_rev
, True))
3418 def _copy_path(self
, src_path
, dest_path
, src_revnum
):
3419 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3420 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3421 parent *must* exist, but DEST_PATH *cannot* exist.
3423 Return the node key and the contents of the new node at DEST_PATH
3425 # get the contents of the node of our src_path
3426 src_key
= self
._open
_readonly
_node
(src_path
, src_revnum
)
3427 src_contents
= self
._get
_node
(src_key
)
3429 # Get the parent path and the base path of the dest_path
3430 (dest_parent
, dest_basename
,) = _path_split(dest_path
)
3431 dest_parent_key
, dest_parent_contents
= \
3432 self
._open
_writable
_node
(dest_parent
, False)
3434 if dest_parent_contents
.has_key(dest_basename
):
3435 msg
= "Attempt to add path '%s' to repository mirror " % dest_path
3436 msg
= msg
+ "when it already exists in the mirror."
3437 raise self
.SVNRepositoryMirrorPathExistsError
, msg
3439 dest_parent_contents
[dest_basename
] = src_key
3440 self
._invoke
_delegates
('copy_path', src_path
, dest_path
, src_revnum
)
3442 # Yes sir, src_key and src_contents are also the contents of the
3443 # destination. This is a cheap copy, remember! :-)
3444 return src_key
, src_contents
3446 def _fill_symbolic_name(self
, svn_commit
):
3447 """Performs all copies necessary to create as much of the the tag
3448 or branch SVN_COMMIT.symbolic_name as possible given the current
3449 revision of the repository mirror.
3451 The symbolic name is guaranteed to exist in the Subversion
3452 repository by the end of this call, even if there are no paths
3454 symbol_fill
= self
.symbolings_reader
.filling_guide_for_symbol(
3455 svn_commit
.symbolic_name
, self
.youngest
)
3456 # Get the list of sources for the symbolic name.
3457 sources
= symbol_fill
.get_sources()
3460 if self
.tags_db
.has_key(svn_commit
.symbolic_name
):
3461 dest_prefix
= Ctx().project
.get_tag_path(svn_commit
.symbolic_name
)
3463 dest_prefix
= Ctx().project
.get_branch_path(svn_commit
.symbolic_name
)
3465 dest_key
= self
._open
_writable
_node
(dest_prefix
, False)[0]
3466 self
._fill
(symbol_fill
, dest_prefix
, dest_key
, sources
)
3468 # We can only get here for a branch whose first commit is an add
3469 # (as opposed to a copy).
3470 dest_path
= Ctx().project
.get_branch_path(symbol_fill
.name
)
3471 if not self
._path
_exists
(dest_path
):
3472 # If our symbol_fill was empty, that means that our first
3473 # commit on the branch was to a file added on the branch, and
3474 # that this is our first fill of that branch.
3476 # This case is covered by test 16.
3478 # ...we create the branch by copying trunk from the our
3479 # current revision number minus 1
3480 source_path
= Ctx().project
.trunk_path
3481 entries
= self
._copy
_path
(source_path
, dest_path
,
3482 svn_commit
.revnum
- 1)[1]
3483 # Now since we've just copied trunk to a branch that's
3484 # *supposed* to be empty, we delete any entries in the
3486 for entry
in entries
:
3487 del_path
= dest_path
+ '/' + entry
3488 # Delete but don't prune.
3489 self
._delete
_path
(del_path
)
3491 msg
= "Error filling branch '" \
3492 + _clean_symbolic_name(symbol_fill
.name
) + "'.\n"
3493 msg
= msg
+ "Received an empty SymbolicNameFillingGuide and\n"
3494 msg
= msg
+ "attempted to create a branch that already exists."
3495 raise self
.SVNRepositoryMirrorInvalidFillOperationError
, msg
3497 def _fill(self
, symbol_fill
, dest_prefix
, dest_key
, sources
,
3498 path
= None, parent_source_prefix
= None,
3499 preferred_revnum
= None, prune_ok
= None):
3500 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3501 SOURCES, and recurse into the child items.
3503 DEST_PREFIX is the prefix of the destination directory, e.g.
3504 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3505 FillSource classes that are candidates to be copied to the
3506 destination. DEST_KEY is the key in self.nodes_db to the
3507 destination, or None if the destination does not yet exist.
3509 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3510 are at the top level, e.g. '/tags/my_tag'.
3512 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3513 the parent directory, and PREFERRED_REVNUM is an int which is the
3514 source revision number that the caller (who may have copied KEY's
3515 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3516 then no revision is preferable to any other (which probably means
3517 that no copies have happened yet).
3519 PRUNE_OK means that a copy has been made in this recursion, and
3520 it's safe to prune directories that are not in
3521 SYMBOL_FILL._node_tree, provided that said directory has a source
3522 prefix of one of the PARENT_SOURCE_PREFIX.
3524 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3525 should only be passed in by recursive calls."""
3526 # Calculate scores and revnums for all sources
3527 for source
in sources
:
3528 src_revnum
, score
= symbol_fill
.get_best_revnum(source
.node
,
3530 source
.set_score(score
, src_revnum
)
3532 # Sort the sources in descending score order so that we will make
3533 # a eventual copy from the source with the highest score.
3535 copy_source
= sources
[0]
3537 src_path
= _path_join(copy_source
.prefix
, path
)
3538 dest_path
= _path_join(dest_prefix
, path
)
3540 # Figure out if we shall copy to this destination and delete any
3541 # destination path that is in the way.
3543 if dest_key
is None:
3545 elif prune_ok
and (parent_source_prefix
!= copy_source
.prefix
or
3546 copy_source
.revnum
!= preferred_revnum
):
3547 # We are about to replace the destination, so we need to remove
3548 # it before we perform the copy.
3549 self
._delete
_path
(dest_path
)
3553 dest_key
, dest_entries
= self
._copy
_path
(src_path
, dest_path
,
3557 dest_entries
= self
._get
_node
(dest_key
)
3559 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3560 # elements and the values are lists of FillSource classes where
3561 # this path element exists.
3563 for source
in sources
:
3564 if isinstance(source
.node
, SvnRevisionRange
):
3566 for entry
, node
in source
.node
.items():
3567 src_entries
.setdefault(entry
, []).append(
3568 FillSource(source
.prefix
, node
))
3571 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3573 for entry
in dest_entries
:
3574 if not src_entries
.has_key(entry
):
3575 delete_list
.append(entry
)
3577 if not self
.new_nodes
.has_key(dest_key
):
3578 dest_key
, dest_entries
= self
._open
_writable
_node
(dest_path
, True)
3579 # Sort the delete list to get "diffable" dumpfiles.
3581 for entry
in delete_list
:
3582 self
._fast
_delete
_path
(dest_path
, dest_entries
, entry
)
3584 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3585 src_keys
= src_entries
.keys()
3587 for src_key
in src_keys
:
3588 next_dest_key
= dest_entries
.get(src_key
, None)
3589 self
._fill
(symbol_fill
, dest_prefix
, next_dest_key
,
3590 src_entries
[src_key
], _path_join(path
, src_key
),
3591 copy_source
.prefix
, sources
[0].revnum
, prune_ok
)
3593 def _synchronize_default_branch(self
, svn_commit
):
3594 """Propagate any changes that happened on a non-trunk default
3595 branch to the trunk of the repository. See
3596 CVSCommit._post_commit() for details on why this is necessary."""
3597 for cvs_rev
in svn_commit
.cvs_revs
:
3598 svn_trunk_path
= Ctx().project
.make_trunk_path(cvs_rev
.cvs_path
)
3599 if cvs_rev
.op
== OP_ADD
or cvs_rev
.op
== OP_CHANGE
:
3600 if self
._path
_exists
(svn_trunk_path
):
3601 # Delete the path on trunk...
3602 self
._delete
_path
(svn_trunk_path
)
3603 # ...and copy over from branch
3604 self
._copy
_path
(cvs_rev
.svn_path
, svn_trunk_path
,
3605 svn_commit
.motivating_revnum
)
3606 elif cvs_rev
.op
== OP_DELETE
:
3608 self
._delete
_path
(svn_trunk_path
)
3610 msg
= ("Unknown CVSRevision operation '%s' in default branch sync."
3612 raise self
.SVNRepositoryMirrorUnexpectedOperationError
, msg
3614 def commit(self
, svn_commit
):
3615 """Add an SVNCommit to the SVNRepository, incrementing the
3616 Repository revision number, and changing the repository. Invoke
3617 the delegates' _start_commit() method."""
3619 if svn_commit
.revnum
== 2:
3620 self
._initialize
_repository
(svn_commit
.get_date())
3622 self
._start
_commit
(svn_commit
)
3624 if svn_commit
.symbolic_name
:
3625 Log().write(LOG_VERBOSE
, "Filling symbolic name:",
3626 _clean_symbolic_name(svn_commit
.symbolic_name
))
3627 self
._fill
_symbolic
_name
(svn_commit
)
3628 elif svn_commit
.motivating_revnum
:
3629 Log().write(LOG_VERBOSE
, "Synchronizing default_branch motivated by %d"
3630 % svn_commit
.motivating_revnum
)
3631 self
._synchronize
_default
_branch
(svn_commit
)
3632 else: # This actually commits CVSRevisions
3633 if len(svn_commit
.cvs_revs
) > 1: plural
= "s"
3635 Log().write(LOG_VERBOSE
, "Committing %d CVSRevision%s"
3636 % (len(svn_commit
.cvs_revs
), plural
))
3637 for cvs_rev
in svn_commit
.cvs_revs
:
3638 # See comment in CVSCommit._commit() for what this is all
3639 # about. Note that although asking self._path_exists() is
3640 # somewhat expensive, we only do it if the first two (cheap)
3641 # tests succeed first.
3642 if not ((cvs_rev
.deltatext_code
== DELTATEXT_EMPTY
)
3643 and (cvs_rev
.rev
== "1.1.1.1")
3644 and self
._path
_exists
(cvs_rev
.svn_path
)):
3645 if cvs_rev
.op
== OP_ADD
:
3646 self
._add
_path
(cvs_rev
)
3647 elif cvs_rev
.op
== OP_CHANGE
:
3648 # Fix for Issue #74:
3650 # Here's the scenario. You have file FOO that is imported
3651 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3654 # Moving forward in time, FOO is deleted on the default
3655 # branch (r1.1.1.2). cvs2svn determines that this delete
3656 # also needs to happen on trunk, so FOO is deleted on
3659 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3660 # not 'dead', we assume it's a change). However, since
3661 # our trunk file has been deleted, svnadmin blows up--you
3662 # can't change a file that doesn't exist!
3664 # Soooo... we just check the path, and if it doesn't
3665 # exist, we do an add... if the path does exist, it's
3666 # business as usual.
3667 if not self
._path
_exists
(cvs_rev
.svn_path
):
3668 self
._add
_path
(cvs_rev
)
3670 self
._change
_path
(cvs_rev
)
3672 if cvs_rev
.op
== OP_DELETE
:
3673 self
._delete
_path
(cvs_rev
.svn_path
, Ctx().prune
)
3676 """Callback for the Cleanup.register in self.__init__."""
3678 self
.nodes_db
= None
3680 def add_delegate(self
, delegate
):
3681 """Adds DELEGATE to self.delegates.
3683 For every delegate you add, as soon as SVNRepositoryMirror
3684 performs a repository action method, SVNRepositoryMirror will call
3685 the delegate's corresponding repository action method. Multiple
3686 delegates will be called in the order that they are added. See
3687 SVNRepositoryMirrorDelegate for more information."""
3688 self
.delegates
.append(delegate
)
3690 def _invoke_delegates(self
, method
, *args
):
3691 """Iterate through each of our delegates, in the order that they
3692 were added, and call the delegate's method named METHOD with the
3693 arguments in ARGS."""
3694 for delegate
in self
.delegates
:
3695 getattr(delegate
, method
)(*args
)
3698 """Calls the delegate finish method."""
3700 self
._invoke
_delegates
('finish')
3704 class SVNCommitItem
:
3705 """A wrapper class for CVSRevision objects upon which
3706 Subversion-related data (such as properties) may be hung."""
3708 def __init__(self
, c_rev
, svn_props_changed
):
3709 """Initialize instance and record the properties for this file.
3710 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3711 to have changed since the last revision.
3713 The properties are set by the SVNPropertySetters in
3714 Ctx().svn_property_setters, then we read a couple of the
3715 properties back out for our own purposes."""
3718 # Did the svn properties change for this file (i.e., do they have
3719 # to be written to the dumpfile?)
3720 self
.svn_props_changed
= svn_props_changed
3722 # The properties for this item as a map { key : value }. If VALUE
3723 # is None, no property should be set.
3724 self
.svn_props
= { }
3726 for svn_property_setter
in Ctx().svn_property_setters
:
3727 svn_property_setter
.set_properties(self
)
3729 # Remember if we need to filter the EOLs. We could actually use
3730 # self.svn_props now, since it is initialized for each revision.
3731 self
.needs_eol_filter
= \
3732 self
.svn_props
.get('svn:eol-style', None) is not None
3734 self
.has_keywords
= self
.svn_props
.get('svn:keywords', None) is not None
3737 class SVNPropertySetter
:
3738 """Abstract class for objects that can set properties on a SVNCommitItem."""
3740 def set_properties(self
, s_item
):
3741 """Set any properties that can be determined for S_ITEM."""
3743 raise NotImplementedError
3746 class SVNRepositoryMirrorDelegate
:
3747 """Abstract superclass for any delegate to SVNRepositoryMirror.
3748 Subclasses must implement all of the methods below.
3750 For each method, a subclass implements, in its own way, the
3751 Subversion operation implied by the method's name. For example, for
3752 the add_path method, the DumpfileDelegate would write out a
3753 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3754 would merely print that the path is being added to the repository,
3755 and the RepositoryDelegate would actually cause the path to be added
3756 to the Subversion repository that it is creating.
3759 def start_commit(self
, svn_commit
):
3760 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3761 see subclass implementation for details."""
3762 raise NotImplementedError
3764 def mkdir(self
, path
):
3765 """PATH is a string; see subclass implementation for details."""
3766 raise NotImplementedError
3768 def add_path(self
, s_item
):
3769 """S_ITEM is an SVNCommitItem; see subclass implementation for
3771 raise NotImplementedError
3773 def change_path(self
, s_item
):
3774 """S_ITEM is an SVNCommitItem; see subclass implementation for
3776 raise NotImplementedError
3778 def delete_path(self
, path
):
3779 """PATH is a string; see subclass implementation for
3781 raise NotImplementedError
3783 def copy_path(self
, src_path
, dest_path
, src_revnum
):
3784 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3785 subversion revision number (int); see subclass implementation for
3787 raise NotImplementedError
3790 """Perform any cleanup necessary after all revisions have been
3792 raise NotImplementedError
3795 class DumpfileDelegate(SVNRepositoryMirrorDelegate
):
3796 """Create a Subversion dumpfile."""
3798 def __init__(self
, dumpfile_path
=None):
3799 """Return a new DumpfileDelegate instance, attached to a dumpfile
3800 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3802 self
.dumpfile_path
= dumpfile_path
3804 self
.dumpfile_path
= Ctx().dumpfile
3806 self
.dumpfile
= open(self
.dumpfile_path
, 'wb')
3807 self
._write
_dumpfile
_header
(self
.dumpfile
)
3809 def _write_dumpfile_header(self
, dumpfile
):
3810 # Initialize the dumpfile with the standard headers.
3812 # Since the CVS repository doesn't have a UUID, and the Subversion
3813 # repository will be created with one anyway, we don't specify a
3814 # UUID in the dumpflie
3815 dumpfile
.write('SVN-fs-dump-format-version: 2\n\n')
3817 def _utf8_path(self
, path
):
3818 """Return a copy of PATH encoded in UTF-8."""
3819 pieces
= string
.split(path
, '/')
3820 # Convert each path component separately (as they may each use
3821 # different encodings).
3822 for i
in range(len(pieces
)):
3824 # Log messages can be converted with the 'replace' strategy,
3825 # but we can't afford any lossiness here.
3826 pieces
[i
] = to_utf8(pieces
[i
], 'strict')
3827 except UnicodeError:
3829 "Unable to convert a path '%s' to internal encoding.\n"
3830 "Consider rerunning with (for example) '--encoding=latin1'."
3832 return string
.join(pieces
, '/')
3834 def _string_for_prop(self
, name
, value
):
3835 """Return a property in the form needed for the dumpfile."""
3837 return 'K %d\n%s\nV %d\n%s\n' % (len(name
), name
, len(value
), value
)
3839 def start_commit(self
, svn_commit
):
3840 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3842 self
.revision
= svn_commit
.revnum
3844 # The start of a new commit typically looks like this:
3846 # Revision-number: 1
3847 # Prop-content-length: 129
3848 # Content-length: 129
3853 # Log message for revision 1.
3861 # 2003-04-22T22:57:58.132837Z
3864 # Notice that the length headers count everything -- not just the
3865 # length of the data but also the lengths of the lengths, including
3866 # the 'K ' or 'V ' prefixes.
3868 # The reason there are both Prop-content-length and Content-length
3869 # is that the former includes just props, while the latter includes
3870 # everything. That's the generic header form for any entity in a
3871 # dumpfile. But since revisions only have props, the two lengths
3872 # are always the same for revisions.
3874 # Calculate the output needed for the property definitions.
3875 props
= svn_commit
.get_revprops()
3876 prop_names
= props
.keys()
3879 for propname
in prop_names
:
3880 if props
[propname
] is not None:
3881 prop_strings
.append(self
._string
_for
_prop
(propname
, props
[propname
]))
3883 all_prop_strings
= ''.join(prop_strings
) + 'PROPS-END\n'
3884 total_len
= len(all_prop_strings
)
3886 # Print the revision header and props
3887 self
.dumpfile
.write('Revision-number: %d\n'
3888 'Prop-content-length: %d\n'
3889 'Content-length: %d\n'
3891 % (self
.revision
, total_len
, total_len
))
3893 self
.dumpfile
.write(all_prop_strings
)
3894 self
.dumpfile
.write('\n')
3896 def mkdir(self
, path
):
3897 """Emit the creation of directory PATH."""
3898 self
.dumpfile
.write("Node-path: %s\n"
3900 "Node-action: add\n"
3902 "\n" % self
._utf
8_path
(path
))
3904 def _add_or_change_path(self
, s_item
, op
):
3905 """Emit the addition or change corresponding to S_ITEM.
3906 OP is either the constant OP_ADD or OP_CHANGE."""
3911 elif op
== OP_CHANGE
:
3914 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3917 # Convenience variables
3918 c_rev
= s_item
.c_rev
3920 # The property handling here takes advantage of an undocumented
3921 # but IMHO consistent feature of the Subversion dumpfile-loading
3922 # code. When a node's properties aren't mentioned (that is, the
3923 # "Prop-content-length:" header is absent, no properties are
3924 # listed at all, and there is no "PROPS-END\n" line) then no
3925 # change is made to the node's properties.
3927 # This is consistent with the way dumpfiles behave w.r.t. text
3928 # content changes, so I'm comfortable relying on it. If you
3929 # commit a change to *just* the properties of some node that
3930 # already has text contents from a previous revision, then in the
3931 # dumpfile output for the prop change, no "Text-content-length:"
3932 # nor "Text-content-md5:" header will be present, and the text of
3933 # the file will not be given. But this does not cause the file's
3934 # text to be erased! It simply remains unchanged.
3936 # This works out great for cvs2svn, due to lucky coincidences:
3938 # For files, the only properties we ever set are set in the first
3939 # revision; all other revisions (including on branches) inherit
3940 # from that. After the first revision, we never change file
3941 # properties, therefore, there is no need to remember the full set
3942 # of properties on a given file once we've set it.
3944 # For directories, the only property we set is "svn:ignore", and
3945 # while we may change it after the first revision, we always do so
3946 # based on the contents of a ".cvsignore" file -- in other words,
3947 # CVS is doing the remembering for us, so we still don't have to
3948 # preserve the previous value of the property ourselves.
3950 # Calculate the (sorted-by-name) property string and length, if any.
3951 if s_item
.svn_props_changed
:
3952 svn_props
= s_item
.svn_props
3954 prop_names
= svn_props
.keys()
3956 for pname
in prop_names
:
3957 pvalue
= svn_props
[pname
]
3958 if pvalue
is not None:
3959 prop_contents
+= self
._string
_for
_prop
(pname
, pvalue
)
3960 prop_contents
+= 'PROPS-END\n'
3961 props_header
= 'Prop-content-length: %d\n' % len(prop_contents
)
3966 # treat .cvsignore as a directory property
3967 dir_path
, basename
= os
.path
.split(c_rev
.svn_path
)
3968 if basename
== ".cvsignore":
3969 ignore_vals
= generate_ignores(c_rev
)
3970 ignore_contents
= '\n'.join(ignore_vals
)
3971 ignore_contents
= ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3972 (len(ignore_contents
), ignore_contents
))
3973 ignore_contents
= ignore_contents
+ 'PROPS-END\n'
3974 ignore_len
= len(ignore_contents
)
3976 # write headers, then props
3977 self
.dumpfile
.write('Node-path: %s\n'
3979 'Node-action: change\n'
3980 'Prop-content-length: %d\n'
3981 'Content-length: %d\n'
3984 % (self
._utf
8_path
(dir_path
), ignore_len
,
3985 ignore_len
, ignore_contents
))
3987 # If the file has keywords, we must prevent CVS/RCS from expanding
3988 # the keywords because they must be unexpanded in the repository,
3989 # or Subversion will get confused.
3990 pipe_cmd
, pipe
= Ctx().cvs_repository
.get_co_pipe(
3991 c_rev
, suppress_keyword_substitution
=s_item
.has_keywords
)
3993 self
.dumpfile
.write('Node-path: %s\n'
3996 '%s' # no property header if no props
3997 'Text-content-length: '
3998 % (self
._utf
8_path
(c_rev
.svn_path
),
3999 action
, props_header
))
4001 pos
= self
.dumpfile
.tell()
4003 self
.dumpfile
.write('0000000000000000\n'
4004 'Text-content-md5: 00000000000000000000000000000000\n'
4005 'Content-length: 0000000000000000\n'
4009 self
.dumpfile
.write(prop_contents
)
4011 # Insert a filter to convert all EOLs to LFs if neccessary
4012 if s_item
.needs_eol_filter
:
4013 data_reader
= LF_EOL_Filter(pipe
.stdout
)
4015 data_reader
= pipe
.stdout
4017 # Insert the rev contents, calculating length and checksum as we go.
4018 checksum
= md5
.new()
4021 buf
= data_reader
.read(PIPE_READ_SIZE
)
4024 checksum
.update(buf
)
4025 length
= length
+ len(buf
)
4026 self
.dumpfile
.write(buf
)
4029 error_output
= pipe
.stderr
.read()
4030 exit_status
= pipe
.wait()
4032 raise FatalError("The command '%s' failed with exit status: %s\n"
4033 "and the following output:\n"
4034 "%s" % (pipe_cmd
, exit_status
, error_output
))
4036 # Go back to patch up the length and checksum headers:
4037 self
.dumpfile
.seek(pos
, 0)
4038 # We left 16 zeros for the text length; replace them with the real
4039 # length, padded on the left with spaces:
4040 self
.dumpfile
.write('%16d' % length
)
4041 # 16... + 1 newline + len('Text-content-md5: ') == 35
4042 self
.dumpfile
.seek(pos
+ 35, 0)
4043 self
.dumpfile
.write(checksum
.hexdigest())
4044 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4045 self
.dumpfile
.seek(pos
+ 84, 0)
4046 # The content length is the length of property data, text data,
4047 # and any metadata around/inside around them.
4048 self
.dumpfile
.write('%16d' % (length
+ len(prop_contents
)))
4049 # Jump back to the end of the stream
4050 self
.dumpfile
.seek(0, 2)
4052 # This record is done (write two newlines -- one to terminate
4053 # contents that weren't themselves newline-termination, one to
4054 # provide a blank line for readability.
4055 self
.dumpfile
.write('\n\n')
4057 def add_path(self
, s_item
):
4058 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4059 self
._add
_or
_change
_path
(s_item
, OP_ADD
)
4061 def change_path(self
, s_item
):
4062 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4063 self
._add
_or
_change
_path
(s_item
, OP_CHANGE
)
4065 def delete_path(self
, path
):
4066 """Emit the deletion of PATH."""
4067 self
.dumpfile
.write('Node-path: %s\n'
4068 'Node-action: delete\n'
4069 '\n' % self
._utf
8_path
(path
))
4071 def copy_path(self
, src_path
, dest_path
, src_revnum
):
4072 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4073 # We don't need to include "Node-kind:" for copies; the loader
4074 # ignores it anyway and just uses the source kind instead.
4075 self
.dumpfile
.write('Node-path: %s\n'
4076 'Node-action: add\n'
4077 'Node-copyfrom-rev: %d\n'
4078 'Node-copyfrom-path: /%s\n'
4080 % (self
._utf
8_path
(dest_path
),
4082 self
._utf
8_path
(src_path
)))
4085 """Perform any cleanup necessary after all revisions have been
4087 self
.dumpfile
.close()
4090 class RepositoryDelegate(DumpfileDelegate
):
4091 """Creates a new Subversion Repository. DumpfileDelegate does all
4092 of the heavy lifting."""
4094 self
.svnadmin
= Ctx().svnadmin
4095 self
.target
= Ctx().target
4096 if not Ctx().existing_svnrepos
:
4097 Log().write(LOG_NORMAL
,"Creating new repository '%s'" % (self
.target
))
4098 if not Ctx().fs_type
:
4099 # User didn't say what kind repository (bdb, fsfs, etc).
4100 # We still pass --bdb-txn-nosync. It's a no-op if the default
4101 # repository type doesn't support it, but we definitely want
4102 # it if BDB is the default.
4103 run_command('%s create %s "%s"' % (self
.svnadmin
,
4106 elif Ctx().fs_type
== 'bdb':
4107 # User explicitly specified bdb.
4109 # Since this is a BDB repository, pass --bdb-txn-nosync,
4110 # because it gives us a 4-5x speed boost (if cvs2svn is
4111 # creating the repository, cvs2svn should be the only program
4112 # accessing the svn repository (until cvs is done, at least)).
4113 # But we'll turn no-sync off in self.finish(), unless
4114 # instructed otherwise.
4115 run_command('%s create %s %s "%s"' % (self
.svnadmin
,
4120 # User specified something other than bdb.
4121 run_command('%s create %s "%s"' % (self
.svnadmin
,
4122 "--fs-type=%s" % Ctx().fs_type
,
4125 # Since the output of this run is a repository, not a dumpfile,
4126 # the temporary dumpfiles we create should go in the tmpdir.
4127 DumpfileDelegate
.__init
__(self
, temp(Ctx().dumpfile
))
4129 # This is 1 if a commit is in progress, otherwise None.
4130 self
._commit
_in
_progress
= None
4132 self
.dumpfile
= open(self
.dumpfile_path
, 'w+b')
4133 self
.loader_pipe
= SimplePopen([ self
.svnadmin
, 'load', '-q',
4134 self
.target
], True)
4135 self
.loader_pipe
.stdout
.close()
4137 self
._write
_dumpfile
_header
(self
.loader_pipe
.stdin
)
4139 raise FatalError("svnadmin failed with the following output while "
4140 "loading the dumpfile:\n"
4141 + self
.loader_pipe
.stderr
.read())
4143 def _feed_pipe(self
):
4144 """Feed the revision stored in the dumpfile to the svnadmin
4146 self
.dumpfile
.seek(0)
4148 data
= self
.dumpfile
.read(128*1024) # Chunk size is arbitrary
4152 self
.loader_pipe
.stdin
.write(data
)
4154 raise FatalError("svnadmin failed with the following output "
4155 "while loading the dumpfile:\n"
4156 + self
.loader_pipe
.stderr
.read())
4158 def start_commit(self
, svn_commit
):
4159 """Start a new commit. If a commit is already in progress, close
4160 the dumpfile, load it into the svn repository, open a new
4161 dumpfile, and write the header into it."""
4162 if self
._commit
_in
_progress
:
4164 self
.dumpfile
.seek(0)
4165 self
.dumpfile
.truncate()
4166 DumpfileDelegate
.start_commit(self
, svn_commit
)
4167 self
._commit
_in
_progress
= 1
4170 """Loads the last commit into the repository."""
4172 self
.dumpfile
.close()
4173 self
.loader_pipe
.stdin
.close()
4174 error_output
= self
.loader_pipe
.stderr
.read()
4175 exit_status
= self
.loader_pipe
.wait()
4177 raise FatalError('svnadmin load failed with exit status: %s\n'
4178 'and the following output:\n'
4179 '%s' % (exit_status
, error_output
,))
4180 os
.remove(self
.dumpfile_path
)
4182 # If this is a BDB repository, and we created the repository, and
4183 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4184 # line in the DB_CONFIG file, because txn syncing should be on by
4185 # default in BDB repositories.
4187 # We determine if this is a BDB repository by looking for the
4188 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4189 # checking Ctx().fs_type. That way this code will Do The Right
4190 # Thing in all circumstances.
4191 db_config
= os
.path
.join(self
.target
, "db/DB_CONFIG")
4192 if (not Ctx().existing_svnrepos
and not Ctx().bdb_txn_nosync
4193 and os
.path
.exists(db_config
)):
4194 no_sync
= 'set_flags DB_TXN_NOSYNC\n'
4196 contents
= open(db_config
, 'r').readlines()
4197 index
= contents
.index(no_sync
)
4198 contents
[index
] = '# ' + no_sync
4199 contents
= open(db_config
, 'w').writelines(contents
)
4202 class StdoutDelegate(SVNRepositoryMirrorDelegate
):
4203 """Makes no changes to the disk, but writes out information to
4204 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4205 print statements will state that we're doing something, when in
4206 reality, we aren't doing anything other than printing out that we're
4207 doing something. Kind of zen, really."""
4208 def __init__(self
, total_revs
):
4209 self
.total_revs
= total_revs
4211 def start_commit(self
, svn_commit
):
4212 """Prints out the Subversion revision number of the commit that is
4214 Log().write(LOG_VERBOSE
, "=" * 60)
4215 Log().write(LOG_NORMAL
, "Starting Subversion r%d / %d" %
4216 (svn_commit
.revnum
, self
.total_revs
))
4218 def mkdir(self
, path
):
4219 """Print a line stating that we are creating directory PATH."""
4220 Log().write(LOG_VERBOSE
, " New Directory", path
)
4222 def add_path(self
, s_item
):
4223 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4224 Log().write(LOG_VERBOSE
, " Adding", s_item
.c_rev
.svn_path
)
4226 def change_path(self
, s_item
):
4227 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4228 Log().write(LOG_VERBOSE
, " Changing", s_item
.c_rev
.svn_path
)
4230 def delete_path(self
, path
):
4231 """Print a line stating that we are 'deleting' PATH."""
4232 Log().write(LOG_VERBOSE
, " Deleting", path
)
4234 def copy_path(self
, src_path
, dest_path
, src_revnum
):
4235 """Print a line stating that we are 'copying' revision SRC_REVNUM
4236 of SRC_PATH to DEST_PATH."""
4237 Log().write(LOG_VERBOSE
, " Copying revision", src_revnum
, "of", src_path
)
4238 Log().write(LOG_VERBOSE
, " to", dest_path
)
4241 """State that we are done creating our repository."""
4242 Log().write(LOG_VERBOSE
, "Finished creating Subversion repository.")
4243 Log().write(LOG_QUIET
, "Done.")
4246 OS_SEP_PLUS_ATTIC
= os
.sep
+ 'Attic'
4247 Log().write(LOG_QUIET
, "Examining all CVS ',v' files...")
4250 def visit_file(baton
, dirname
, files
):
4253 if fname
[-2:] != ',v':
4255 cd
.found_valid_file
= 1
4256 pathname
= os
.path
.join(dirname
, fname
)
4257 if dirname
[-6:] == OS_SEP_PLUS_ATTIC
:
4258 # drop the 'Attic' portion from the pathname for the canonical name.
4259 cd
.set_fname(os
.path
.join(dirname
[:-6], fname
), pathname
)
4261 # If this file also exists in the attic, it's a fatal error
4262 attic_path
= os
.path
.join(dirname
, 'Attic', fname
)
4263 if os
.path
.exists(attic_path
):
4264 err
= "%s: A CVS repository cannot contain both %s and %s" \
4265 % (error_prefix
, pathname
, attic_path
)
4266 sys
.stderr
.write(err
+ '\n')
4267 cd
.fatal_errors
.append(err
)
4268 cd
.set_fname(pathname
, pathname
)
4269 Log().write(LOG_NORMAL
, pathname
)
4271 cvs2svn_rcsparse
.parse(open(pathname
, 'rb'), cd
)
4272 except (cvs2svn_rcsparse
.common
.RCSParseError
, ValueError,
4274 err
= "%s: '%s' is not a valid ,v file" \
4275 % (error_prefix
, pathname
)
4276 sys
.stderr
.write(err
+ '\n')
4277 cd
.fatal_errors
.append(err
)
4279 Log().write(LOG_WARN
,
4280 "Exception occurred while parsing %s" % pathname
)
4283 os
.path
.walk(Ctx().project
.project_cvs_repos_path
, visit_file
, cd
)
4284 Log().write(LOG_VERBOSE
, 'Processed', cd
.num_files
, 'files')
4286 cd
.write_symbol_db()
4288 if len(cd
.fatal_errors
) > 0:
4289 raise FatalException("Pass 1 complete.\n"
4291 + "Error summary:\n"
4292 + "\n".join(cd
.fatal_errors
) + "\n"
4293 + "Exited due to fatal error(s).\n")
4295 if cd
.found_valid_file
is None:
4296 raise FatalException(
4298 "No RCS files found in your CVS Repository!\n"
4299 "Are you absolutely certain you are pointing cvs2svn\n"
4300 "at a CVS repository?\n"
4302 "Exited due to fatal error(s).\n")
4304 StatsKeeper().reset_c_rev_info()
4305 StatsKeeper().archive()
4306 Log().write(LOG_QUIET
, "Done")
4309 "Pass 2: clean up the revision information."
4311 symbol_db
= SymbolDatabase()
4314 # Convert the list of regexps to a list of strings
4315 excludes
= symbol_db
.find_excluded_symbols(Ctx().excludes
)
4319 Log().write(LOG_QUIET
, "Checking for blocked exclusions...")
4320 blocked_excludes
= symbol_db
.find_blocked_excludes(excludes
)
4321 if blocked_excludes
:
4322 for branch
, blockers
in blocked_excludes
.items():
4323 sys
.stderr
.write(error_prefix
+ ": The branch '%s' cannot be "
4324 "excluded because the following symbols depend "
4325 "on it:\n" % (branch
))
4326 for blocker
in blockers
:
4327 sys
.stderr
.write(" '%s'\n" % (blocker
))
4328 sys
.stderr
.write("\n")
4331 Log().write(LOG_QUIET
, "Checking for forced tags with commits...")
4332 invalid_forced_tags
= [ ]
4333 for forced_tag
in Ctx().forced_tags
:
4334 if excludes
.has_key(forced_tag
):
4336 if symbol_db
.branch_has_commit(forced_tag
):
4337 invalid_forced_tags
.append(forced_tag
)
4338 if invalid_forced_tags
:
4339 sys
.stderr
.write(error_prefix
+ ": The following branches cannot be "
4340 "forced to be tags because they have commits:\n")
4341 for tag
in invalid_forced_tags
:
4342 sys
.stderr
.write(" '%s'\n" % (tag
))
4343 sys
.stderr
.write("\n")
4346 Log().write(LOG_QUIET
, "Checking for tag/branch mismatches...")
4347 mismatches
= symbol_db
.find_mismatches(excludes
)
4348 def is_not_forced(mismatch
):
4350 return not (name
in Ctx().forced_tags
or name
in Ctx().forced_branches
)
4351 mismatches
= filter(is_not_forced
, mismatches
)
4353 sys
.stderr
.write(error_prefix
+ ": The following symbols are tags "
4354 "in some files and branches in others.\nUse "
4355 "--force-tag, --force-branch and/or --exclude to "
4356 "resolve the symbols.\n")
4357 for name
, tag_count
, branch_count
, commit_count
in mismatches
:
4358 sys
.stderr
.write(" '%s' is a tag in %d files, a branch in "
4359 "%d files and has commits in %d files.\n"
4360 % (name
, tag_count
, branch_count
, commit_count
))
4363 # Bail out now if we found errors
4367 # Create the tags database
4368 tags_db
= TagsDatabase(DB_OPEN_NEW
)
4369 for tag
in symbol_db
.tags
:
4370 if tag
not in Ctx().forced_branches
:
4372 for tag
in Ctx().forced_tags
:
4375 Log().write(LOG_QUIET
, "Re-synchronizing CVS revision timestamps...")
4377 # We may have recorded some changes in revisions' timestamp. We need to
4378 # scan for any other files which may have had the same log message and
4379 # occurred at "the same time" and change their timestamps, too.
4381 # read the resync data file
4382 def read_resync(fname
):
4383 "Read the .resync file into memory."
4385 ### note that we assume that we can hold the entire resync file in
4386 ### memory. really large repositories with whacky timestamps could
4387 ### bust this assumption. should that ever happen, then it is possible
4388 ### to split the resync file into pieces and make multiple passes,
4389 ### using each piece.
4392 # A digest maps to a sequence of lists which specify a lower and upper
4393 # time bound for matching up the commit. We keep a sequence of these
4394 # because a number of checkins with the same log message (e.g. an empty
4395 # log message) could need to be remapped. We also make them a list
4396 # because we will dynamically expand the lower/upper bound as we find
4397 # commits that fall into a particular msg and time range.
4399 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4403 for line
in fileinput
.FileInput(fname
):
4404 t1
= int(line
[:8], 16)
4405 digest
= line
[9:DIGEST_END_IDX
]
4406 t2
= int(line
[DIGEST_END_IDX
+1:], 16)
4407 t1_l
= t1
- COMMIT_THRESHOLD
/2
4408 t1_u
= t1
+ COMMIT_THRESHOLD
/2
4409 resync
.setdefault(digest
, []).append([t1_l
, t1_u
, t2
])
4411 # For each digest, sort the resync items in it in increasing order,
4412 # based on the lower time bound.
4413 for val
in resync
.values():
4418 resync
= read_resync(temp(DATAFILE
+ RESYNC_SUFFIX
))
4420 output
= open(temp(DATAFILE
+ CLEAN_REVS_SUFFIX
), 'w')
4421 Cleanup().register(temp(DATAFILE
+ CLEAN_REVS_SUFFIX
), pass3
)
4423 tweaked_timestamps_db
= Database(temp(TWEAKED_TIMESTAMPS_DB
), DB_OPEN_NEW
)
4424 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB
), pass2
)
4426 # process the revisions file, looking for items to clean up
4427 for line
in fileinput
.FileInput(temp(DATAFILE
+ REVS_SUFFIX
)):
4428 c_rev
= CVSRevision(Ctx(), line
[:-1])
4430 # Skip this entire revision if it's on an excluded branch
4431 if excludes
.has_key(c_rev
.branch_name
):
4435 if c_rev
.prev_rev
is not None:
4436 new_prev_ts
= tweaked_timestamps_db
.get(
4437 c_rev
.unique_key(c_rev
.prev_rev
), None)
4439 c_rev
.prev_timestamp
= new_prev_ts
4442 if c_rev
.next_rev
is not None:
4443 new_next_ts
= tweaked_timestamps_db
.get(
4444 c_rev
.unique_key(c_rev
.next_rev
), None)
4446 c_rev
.next_timestamp
= new_next_ts
4448 # Remove all references to excluded tags and branches
4449 def not_excluded(symbol
, excludes
=excludes
):
4450 return not excludes
.has_key(symbol
)
4451 c_rev
.branches
= filter(not_excluded
, c_rev
.branches
)
4452 c_rev
.tags
= filter(not_excluded
, c_rev
.tags
)
4454 # Convert all branches that are forced to be tags
4455 for forced_tag
in Ctx().forced_tags
:
4456 if forced_tag
in c_rev
.branches
:
4457 c_rev
.branches
.remove(forced_tag
)
4458 c_rev
.tags
.append(forced_tag
)
4460 # Convert all tags that are forced to be branches
4461 for forced_branch
in Ctx().forced_branches
:
4462 if forced_branch
in c_rev
.tags
:
4463 c_rev
.tags
.remove(forced_branch
)
4464 c_rev
.branches
.append(forced_branch
)
4466 # see if this is "near" any of the resync records we
4467 # have recorded for this digest [of the log message].
4468 for record
in resync
.get(c_rev
.digest
, []):
4469 if record
[2] == c_rev
.timestamp
:
4470 # This means that either c_rev is the same revision that
4471 # caused the resync record to exist, or c_rev is a different
4472 # CVS revision that happens to have the same timestamp. In
4473 # either case, we don't have to do anything, so we...
4476 if record
[0] <= c_rev
.timestamp
<= record
[1]:
4477 # bingo! We probably want to remap the time on this c_rev,
4478 # unless the remapping would be useless because the new time
4479 # would fall outside the COMMIT_THRESHOLD window for this
4481 new_timestamp
= record
[2]
4482 # If the new timestamp is earlier than that of our previous revision
4483 if new_timestamp
< c_rev
.prev_timestamp
:
4484 desc
= ("%s: Attempt to set timestamp of revision %s on file %s"
4485 + " to time %s, which is before previous the time of"
4486 + " revision %s (%s):")
4487 Log().write(LOG_WARN
, desc
% (warning_prefix
, c_rev
.rev
,
4488 c_rev
.cvs_path
, new_timestamp
,
4489 c_rev
.prev_rev
, c_rev
.prev_timestamp
))
4490 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4491 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4492 # attempted resync time, then sync back to c_rev.prev_timestamp
4494 if ((c_rev
.prev_timestamp
+ 1) - new_timestamp
) < COMMIT_THRESHOLD
:
4495 new_timestamp
= c_rev
.prev_timestamp
+ 1
4496 Log().write(LOG_WARN
, "%s: Time set to %s" % (warning_prefix
,
4499 Log().write(LOG_WARN
, "%s: Timestamp left untouched" %
4503 # If the new timestamp is later than that of our next revision
4504 elif c_rev
.next_timestamp
and new_timestamp
> c_rev
.next_timestamp
:
4505 desc
= ("%s: Attempt to set timestamp of revision %s on file %s"
4506 + " to time %s, which is after time of next"
4507 + " revision %s (%s):")
4508 Log().write(LOG_WARN
, desc
% (warning_prefix
, c_rev
.rev
,
4509 c_rev
.cvs_path
, new_timestamp
,
4510 c_rev
.prev_rev
, c_rev
.next_timestamp
))
4511 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4512 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4513 # attempted resync time, then sync forward to c_rev.next_timestamp
4515 if (new_timestamp
- (c_rev
.next_timestamp
- 1)) < COMMIT_THRESHOLD
:
4516 new_timestamp
= c_rev
.next_timestamp
- 1
4517 Log().write(LOG_WARN
, "%s: Time set to %s" % (warning_prefix
,
4520 Log().write(LOG_WARN
, "%s: Timestamp left untouched" %
4524 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4525 # to the same timestamp.
4526 elif (new_timestamp
== c_rev
.prev_timestamp
4527 or new_timestamp
== c_rev
.next_timestamp
):
4530 # adjust the time range. we want the COMMIT_THRESHOLD from the
4531 # bounds of the earlier/latest commit in this group.
4532 record
[0] = min(record
[0], c_rev
.timestamp
- COMMIT_THRESHOLD
/2)
4533 record
[1] = max(record
[1], c_rev
.timestamp
+ COMMIT_THRESHOLD
/2)
4535 msg
= "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4536 % (c_rev
.cvs_path
, c_rev
.rev
, time
.ctime(c_rev
.timestamp
),
4537 new_timestamp
- c_rev
.timestamp
)
4538 Log().write(LOG_VERBOSE
, msg
)
4540 c_rev
.timestamp
= new_timestamp
4541 tweaked_timestamps_db
[c_rev
.unique_key()] = new_timestamp
4543 # stop looking for hits
4546 output
.write(str(c_rev
) + "\n")
4547 Log().write(LOG_QUIET
, "Done")
4550 Log().write(LOG_QUIET
, "Sorting CVS revisions...")
4551 sort_file(temp(DATAFILE
+ CLEAN_REVS_SUFFIX
),
4552 temp(DATAFILE
+ SORTED_REVS_SUFFIX
))
4553 Cleanup().register(temp(DATAFILE
+ SORTED_REVS_SUFFIX
), pass5
)
4554 Log().write(LOG_QUIET
, "Done")
4557 """Iterate through sorted revs, storing them in a database.
4558 If we're not doing a trunk-only conversion, generate the
4559 LastSymbolicNameDatabase, which contains the last CVSRevision
4560 that is a source for each tag or branch.
4562 Log().write(LOG_QUIET
,
4563 "Copying CVS revision data from flat file to database...")
4564 cvs_revs_db
= CVSRevisionDatabase(DB_OPEN_NEW
)
4565 if not Ctx().trunk_only
:
4566 Log().write(LOG_QUIET
,
4567 "Finding last CVS revisions for all symbolic names...")
4568 last_sym_name_db
= LastSymbolicNameDatabase(DB_OPEN_NEW
)
4570 # This is to avoid testing Ctx().trunk_only every time around the loop
4572 def noop(*args
): pass
4574 create_database
= noop
4575 last_sym_name_db
= DummyLSNDB()
4577 for line
in fileinput
.FileInput(temp(DATAFILE
+ SORTED_REVS_SUFFIX
)):
4578 c_rev
= CVSRevision(Ctx(), line
[:-1])
4579 cvs_revs_db
.log_revision(c_rev
)
4580 last_sym_name_db
.log_revision(c_rev
)
4581 StatsKeeper().record_c_rev(c_rev
)
4583 last_sym_name_db
.create_database()
4584 StatsKeeper().archive()
4585 Log().write(LOG_QUIET
, "Done")
4589 Generate the SVNCommit <-> CVSRevision mapping
4590 databases. CVSCommit._commit also calls SymbolingsLogger to register
4591 CVSRevisions that represent an opening or closing for a path on a
4592 branch or tag. See SymbolingsLogger for more details.
4594 Log().write(LOG_QUIET
, "Mapping CVS revisions to Subversion commits...")
4596 aggregator
= CVSRevisionAggregator()
4597 for line
in fileinput
.FileInput(temp(DATAFILE
+ SORTED_REVS_SUFFIX
)):
4598 c_rev
= CVSRevision(Ctx(), line
[:-1])
4599 if not (Ctx().trunk_only
and c_rev
.branch_name
is not None):
4600 aggregator
.process_revision(c_rev
)
4603 StatsKeeper().set_svn_rev_count(SVNCommit
.revnum
- 1)
4604 StatsKeeper().archive()
4605 Log().write(LOG_QUIET
, "Done")
4608 Log().write(LOG_QUIET
, "Sorting symbolic name source revisions...")
4610 if not Ctx().trunk_only
:
4611 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS
),
4612 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED
))
4613 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED
), pass8
)
4614 Log().write(LOG_QUIET
, "Done")
4617 Log().write(LOG_QUIET
, "Determining offsets for all symbolic names...")
4619 def generate_offsets_for_symbolings():
4620 """This function iterates through all the lines in
4621 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4622 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4623 where SYMBOLIC_NAME is first encountered. This will allow us to
4624 seek to the various offsets in the file and sequentially read only
4625 the openings and closings that we need."""
4627 ###PERF This is a fine example of a db that can be in-memory and
4628 #just flushed to disk when we're done. Later, it can just be sucked
4630 offsets_db
= Database(temp(SYMBOL_OFFSETS_DB
), DB_OPEN_NEW
)
4631 Cleanup().register(temp(SYMBOL_OFFSETS_DB
), pass8
)
4633 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED
), 'r')
4637 line
= file.readline()
4640 sym
, svn_revnum
, cvs_rev_key
= line
.split(" ", 2)
4642 Log().write(LOG_VERBOSE
, " ", sym
)
4644 offsets_db
[sym
] = fpos
4646 if not Ctx().trunk_only
:
4647 generate_offsets_for_symbolings()
4648 Log().write(LOG_QUIET
, "Done.")
4651 svncounter
= 2 # Repository initialization is 1.
4652 repos
= SVNRepositoryMirror()
4653 persistence_manager
= PersistenceManager(DB_OPEN_READ
)
4656 if not Ctx().dry_run
:
4657 repos
.add_delegate(RepositoryDelegate())
4658 Log().write(LOG_QUIET
, "Starting Subversion Repository.")
4660 if not Ctx().dry_run
:
4661 repos
.add_delegate(DumpfileDelegate())
4662 Log().write(LOG_QUIET
, "Starting Subversion Dumpfile.")
4664 repos
.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4667 svn_commit
= persistence_manager
.get_svn_commit(svncounter
)
4670 repos
.commit(svn_commit
)
4688 """Session state for this run of cvs2svn. For example, run-time
4689 options are stored here. This class is a Borg, see
4690 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4692 __shared_state
= { }
4694 self
.__dict
__ = self
.__shared
_state
4697 # Else, initialize to defaults.
4699 self
.dumpfile
= DUMPFILE
4704 self
.existing_svnrepos
= 0
4708 self
.trunk_base
= "trunk"
4709 self
.tags_base
= "tags"
4710 self
.branches_base
= "branches"
4711 self
.encoding
= ["ascii"]
4712 self
.mime_types_file
= None
4713 self
.auto_props_file
= None
4714 self
.auto_props_ignore_case
= False
4715 self
.no_default_eol
= 0
4716 self
.eol_from_mime_type
= 0
4717 self
.keywords_off
= 0
4719 self
.svnadmin
= "svnadmin"
4720 self
.username
= None
4722 self
.skip_cleanup
= 0
4723 self
.bdb_txn_nosync
= 0
4725 self
.forced_branches
= []
4726 self
.forced_tags
= []
4728 self
.symbol_transforms
= []
4729 self
.svn_property_setters
= []
4732 class CVSRevisionNumberSetter(SVNPropertySetter
):
4733 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4735 def set_properties(self
, s_item
):
4736 s_item
.svn_props
['cvs2svn:cvs-rev'] = s_item
.c_rev
.rev
4737 s_item
.svn_props_changed
= True
4740 class MimeMapper(SVNPropertySetter
):
4741 """A class that provides mappings from file names to MIME types."""
4743 def __init__(self
, mime_types_file
):
4746 for line
in fileinput
.input(mime_types_file
):
4747 if line
.startswith("#"):
4750 # format of a line is something like
4751 # text/plain c h cpp
4752 extensions
= line
.split()
4753 if len(extensions
) < 2:
4755 type = extensions
.pop(0)
4756 for ext
in extensions
:
4757 if self
.mappings
.has_key(ext
) and self
.mappings
[ext
] != type:
4758 sys
.stderr
.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4759 % (warning_prefix
, ext
, self
.mappings
[ext
], type))
4760 self
.mappings
[ext
] = type
4762 def set_properties(self
, s_item
):
4763 basename
, extension
= os
.path
.splitext(
4764 os
.path
.basename(s_item
.c_rev
.cvs_path
)
4767 # Extension includes the dot, so strip it (will leave extension
4768 # empty if filename ends with a dot, which is ok):
4769 extension
= extension
[1:]
4771 # If there is no extension (or the file ends with a period), use
4772 # the base name for mapping. This allows us to set mappings for
4773 # files such as README or Makefile:
4775 extension
= basename
4777 mime_type
= self
.mappings
.get(extension
, None)
4778 if mime_type
is not None:
4779 s_item
.svn_props
['svn:mime-type'] = mime_type
4782 class AutoPropsPropertySetter(SVNPropertySetter
):
4783 """Set arbitrary svn properties based on an auto-props configuration.
4785 This class always supports case-sensitive and case-insensitive
4786 pattern matching. The 'correct' behavior is not quite clear,
4787 because subversion itself does an inconsistent job of handling case
4788 in auto-props patterns; see
4789 http://subversion.tigris.org/issues/show_bug.cgi?id=2036."""
4792 """Describes the properties to be set for files matching a pattern."""
4793 def __init__(self
, pattern
, propdict
):
4794 # A glob-like pattern:
4795 self
.pattern
= pattern
4796 # A dictionary of properties that should be set:
4797 self
.propdict
= propdict
4799 def match(self
, basename
):
4800 """Does the file with the specified basename match pattern?"""
4801 return fnmatch
.fnmatch(basename
, self
.pattern
)
4803 def __init__(self
, configfilename
, ignore_case
):
4804 config
= ConfigParser
.ConfigParser()
4806 self
.transform_case
= self
.squash_case
4808 config
.optionxform
= self
.preserve_case
4809 self
.transform_case
= self
.preserve_case
4811 config
.readfp(file(configfilename
))
4813 for section
in config
.sections():
4814 if self
.transform_case(section
) == 'auto-props':
4815 for (pattern
, value
) in config
.items(section
):
4817 self
._add
_pattern
(pattern
, value
)
4819 def squash_case(self
, s
):
4822 def preserve_case(self
, s
):
4825 def _add_pattern(self
, pattern
, value
):
4826 props
= value
.split(';')
4829 s
= prop
.split('=', 1)
4831 propdict
[s
[0]] = None
4833 propdict
[s
[0]] = s
[1]
4834 self
.patterns
.append(
4835 self
.Pattern(self
.transform_case(pattern
), propdict
))
4837 def get_propdict(self
, path
):
4838 basename
= self
.transform_case(os
.path
.basename(path
))
4840 for pattern
in self
.patterns
:
4841 if pattern
.match(basename
):
4842 for (key
,value
) in pattern
.propdict
.items():
4843 if propdict
.has_key(key
):
4844 if propdict
[key
] != value
:
4847 "Contradictory values set for property '%s' for file %s."
4850 propdict
[key
] = value
4852 print 'propdict %s -> %s' % (path
, propdict
,) ###
4855 def set_properties(self
, s_item
):
4856 propdict
= self
.get_propdict(s_item
.c_rev
.cvs_path
)
4857 for (k
,v
) in propdict
.items():
4858 if s_item
.svn_props
.has_key(k
):
4859 if s_item
.svn_props
[k
] != v
:
4862 "Property '%s' already set for file %s."
4863 % (k
, s_item
.c_rev
.cvs_path
,))
4865 s_item
.svn_props
[k
] = v
4868 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter
):
4869 """Set the default mime type for binary files, if no other one is known."""
4871 def set_properties(self
, s_item
):
4872 if not s_item
.svn_props
.has_key('svn:mime-type') \
4873 and s_item
.c_rev
.mode
== 'b':
4874 s_item
.svn_props
['svn:mime-type'] = 'application/octet-stream'
4877 class BinaryFileEOLStyleSetter(SVNPropertySetter
):
4878 """Set the eol-style for binary files to None."""
4880 def set_properties(self
, s_item
):
4881 if s_item
.c_rev
.mode
== 'b':
4882 s_item
.svn_props
['svn:eol-style'] = None
4885 class EOLStyleFromMimeTypeSetter(SVNPropertySetter
):
4886 """Set the eol-style from the mime type if it is not already known.
4888 This setting is influenced by the mime-type setting, which must
4889 already have been set. See also issue #39."""
4891 def set_properties(self
, s_item
):
4892 if not s_item
.svn_props
.has_key('svn:eol-style') \
4893 and s_item
.svn_props
.get('svn:mime-type', None) is not None:
4894 if s_item
.svn_props
['svn:mime-type'].startswith("text/"):
4895 s_item
.svn_props
['svn:eol-style'] = 'native'
4897 s_item
.svn_props
['svn:eol-style'] = None
4900 class DefaultEOLStyleSetter(SVNPropertySetter
):
4901 """Set the default eol-style if one has not already been set."""
4903 def __init__(self
, value
):
4904 """Initialize with the specified default VALUE."""
4908 def set_properties(self
, s_item
):
4909 if not s_item
.svn_props
.has_key('svn:eol-style'):
4910 s_item
.svn_props
['svn:eol-style'] = self
.value
4913 class KeywordsPropertySetter(SVNPropertySetter
):
4914 """Set the svn:keywords property based on the file's mode. See
4917 def __init__(self
, value
):
4918 """Use VALUE for the value of the svn:keywords property if it is
4923 def set_properties(self
, s_item
):
4924 if not s_item
.svn_props
.has_key('svn:keywords') \
4925 and s_item
.c_rev
.mode
in [None, 'kv', 'kvl']:
4926 s_item
.svn_props
['svn:keywords'] = self
.value
4929 class ExecutablePropertySetter(SVNPropertySetter
):
4930 """Set the svn:executable property based on c_rev.file_executable."""
4932 def set_properties(self
, s_item
):
4933 if s_item
.c_rev
.file_executable
:
4934 s_item
.svn_props
['svn:executable'] = '*'
4937 def convert(start_pass
, end_pass
):
4938 "Convert a CVS repository to an SVN repository."
4941 times
= [ None ] * (end_pass
+ 1)
4942 times
[start_pass
- 1] = time
.time()
4943 StatsKeeper().set_start_time(time
.time())
4944 for i
in range(start_pass
- 1, end_pass
):
4945 Log().write(LOG_QUIET
, '----- pass %d -----' % (i
+ 1))
4947 times
[i
+ 1] = time
.time()
4948 StatsKeeper().log_duration_for_pass(times
[i
+ 1] - times
[i
], i
+ 1)
4949 # Dispose of items in Ctx() not intended to live past the end of the pass
4950 # (Identified by exactly one leading underscore)
4951 for attr
in dir(Ctx()):
4952 if (len(attr
) > 2 and attr
[0] == '_' and attr
[1] != '_'
4953 and attr
[:6] != "_Ctx__"):
4954 delattr(Ctx(), attr
)
4955 if not Ctx().skip_cleanup
:
4956 cleanup
.cleanup(_passes
[i
])
4957 StatsKeeper().set_end_time(time
.time())
4959 Log().write(LOG_QUIET
, StatsKeeper())
4961 Log().write(LOG_QUIET
,
4962 '(These are unaltered CVS repository stats and do not\n'
4963 ' reflect tags or branches excluded via --exclude)\n')
4964 Log().write(LOG_NORMAL
, StatsKeeper().timings())
4967 def normalize_ttb_path(opt
, path
):
4968 """Normalize a path to be used for --trunk, --tags, or --branches.
4970 1. Strip leading, trailing, and duplicated '/'.
4971 2. Verify that the path is not empty.
4973 Return the normalized path.
4975 If the path is invalid, write an error message and exit."""
4977 norm_path
= _path_join(*path
.split('/'))
4979 raise FatalError("cannot pass an empty path to %s." % (opt
,))
4983 def verify_paths_disjoint(*paths
):
4984 """Verify that all of the paths in the argument list are disjoint.
4986 If any of the paths is nested in another one (i.e., in the sense
4987 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4988 write an error message and exit."""
4990 paths
= [(path
.split('/'), path
) for path
in paths
]
4991 # If all overlapping elements are equal, a shorter list is
4992 # considered "less than" a longer one. Therefore if any paths are
4993 # nested, this sort will leave at least one such pair adjacent, in
4994 # the order [nest,nestling].
4996 for i
in range(1, len(paths
)):
4997 split_path1
, path1
= paths
[i
- 1]
4998 split_path2
, path2
= paths
[i
]
4999 if len(split_path1
) <= len(split_path2
) \
5000 and split_path2
[:len(split_path1
)] == split_path1
:
5001 raise FatalError("paths %s and %s are not disjoint." % (path1
, path2
,))
5005 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
5006 % os
.path
.basename(sys
.argv
[0])
5007 print ' --help, -h print this usage message and exit with success'
5008 print ' --version print the version number'
5011 print ' -s PATH path for SVN repos'
5012 print ' -p START[:END] start at pass START, end at pass END of %d' \
5014 print ' If only START is given, run only pass START'
5015 print ' (implicitly enables --skip-cleanup)'
5016 print ' --existing-svnrepos load into existing SVN repository'
5017 print ' --dumpfile=PATH name of intermediate svn dumpfile'
5018 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
5019 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
5020 print ' --dry-run do not create a repository or a dumpfile;'
5021 print ' just print what would happen.'
5022 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
5023 print ' (only use this if having problems with RCS)'
5024 print ' --svnadmin=PATH path to the svnadmin program'
5025 print ' --trunk-only convert only trunk commits, not tags nor branches'
5026 print ' --trunk=PATH path for trunk (default: %s)' \
5028 print ' --branches=PATH path for branches (default: %s)' \
5029 % Ctx().branches_base
5030 print ' --tags=PATH path for tags (default: %s)' \
5032 print ' --no-prune don\'t prune empty directories'
5033 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
5034 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
5035 print ' Multiple of these options may be passed, where they'
5036 print ' will be treated as an ordered list of encodings to'
5037 print ' attempt (with "ascii" as a hardcoded last resort)'
5038 print ' --force-branch=NAME force NAME to be a branch'
5039 print ' --force-tag=NAME force NAME to be a tag'
5040 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
5041 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
5042 print ' use Python regexp and reference syntax respectively'
5043 print ' --username=NAME username for cvs2svn-synthesized commits'
5044 print ' --skip-cleanup prevent the deletion of intermediate files'
5045 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
5046 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
5047 print ' --cvs-revnums record CVS revision numbers as file properties'
5048 print ' --auto-props=FILE set file properties from the auto-props section'
5049 print ' of a file in svn config format'
5050 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
5051 print ' --mime-types=FILE specify an apache-style mime.types file for'
5052 print ' setting svn:mime-type'
5053 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
5054 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
5055 print ' non-binary files with undetermined mime types'
5056 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
5057 print ' cvs2svn sets svn:keywords on non-binary files to'
5058 print ' "%s")' % SVN_KEYWORDS_VALUE
5061 # Convenience var, so we don't have to keep instantiating this Borg.
5066 end_pass
= len(_passes
)
5069 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'p:s:qvh',
5070 [ "help", "create", "trunk=",
5071 "username=", "existing-svnrepos",
5072 "branches=", "tags=", "encoding=",
5073 "force-branch=", "force-tag=", "exclude=",
5074 "use-cvs", "mime-types=",
5075 "auto-props=", "auto-props-ignore-case",
5076 "eol-from-mime-type", "no-default-eol",
5077 "trunk-only", "no-prune", "dry-run",
5078 "dump-only", "dumpfile=", "tmpdir=",
5079 "svnadmin=", "skip-cleanup", "cvs-revnums",
5080 "bdb-txn-nosync", "fs-type=",
5081 "version", "profile",
5082 "keywords-off", "symbol-transform="])
5083 except getopt
.GetoptError
, e
:
5084 sys
.stderr
.write(error_prefix
+ ': ' + str(e
) + '\n\n')
5088 for opt
, value
in opts
:
5089 if opt
== '--version':
5090 print '%s version %s' % (os
.path
.basename(sys
.argv
[0]), VERSION
)
5093 # Don't cleanup if we're doing incrementals.
5094 ctx
.skip_cleanup
= 1
5095 if value
.find(':') > 0:
5096 start_pass
, end_pass
= map(int, value
.split(':'))
5098 end_pass
= start_pass
= int(value
)
5099 if start_pass
> len(_passes
) or start_pass
< 1:
5101 'illegal value (%d) for starting pass. Must be 1 through %d.'
5102 % (int(start_pass
), len(_passes
),))
5103 if end_pass
< start_pass
or end_pass
> len(_passes
):
5105 'illegal value (%d) for ending pass. Must be %d through %d.'
5106 % (int(end_pass
), int(start_pass
), len(_passes
),))
5107 elif (opt
== '--help') or (opt
== '-h'):
5110 Log().log_level
= LOG_VERBOSE
5113 Log().log_level
= LOG_QUIET
5117 elif opt
== '--existing-svnrepos':
5118 ctx
.existing_svnrepos
= 1
5119 elif opt
== '--dumpfile':
5120 ctx
.dumpfile
= value
5121 elif opt
== '--tmpdir':
5123 elif opt
== '--use-cvs':
5125 elif opt
== '--svnadmin':
5126 ctx
.svnadmin
= value
5127 elif opt
== '--trunk-only':
5129 elif opt
== '--trunk':
5130 ctx
.trunk_base
= normalize_ttb_path(opt
, value
)
5131 elif opt
== '--branches':
5132 ctx
.branches_base
= normalize_ttb_path(opt
, value
)
5133 elif opt
== '--tags':
5134 ctx
.tags_base
= normalize_ttb_path(opt
, value
)
5135 elif opt
== '--no-prune':
5137 elif opt
== '--dump-only':
5139 elif opt
== '--dry-run':
5141 elif opt
== '--encoding':
5142 ctx
.encoding
.insert(-1, value
)
5143 elif opt
== '--force-branch':
5144 ctx
.forced_branches
.append(value
)
5145 elif opt
== '--force-tag':
5146 ctx
.forced_tags
.append(value
)
5147 elif opt
== '--exclude':
5149 ctx
.excludes
.append(re
.compile('^' + value
+ '$'))
5151 raise FatalError("'%s' is not a valid regexp." % (value
,))
5152 elif opt
== '--mime-types':
5153 ctx
.mime_types_file
= value
5154 elif opt
== '--auto-props':
5155 ctx
.auto_props_file
= value
5156 elif opt
== '--auto-props-ignore-case':
5157 ctx
.auto_props_ignore_case
= True
5158 elif opt
== '--eol-from-mime-type':
5159 ctx
.eol_from_mime_type
= 1
5160 elif opt
== '--no-default-eol':
5161 ctx
.no_default_eol
= 1
5162 elif opt
== '--keywords-off':
5163 ctx
.keywords_off
= 1
5164 elif opt
== '--username':
5165 ctx
.username
= value
5166 elif opt
== '--skip-cleanup':
5167 ctx
.skip_cleanup
= 1
5168 elif opt
== '--cvs-revnums':
5169 ctx
.svn_property_setters
.append(CVSRevisionNumberSetter())
5170 elif opt
== '--bdb-txn-nosync':
5171 ctx
.bdb_txn_nosync
= 1
5172 elif opt
== '--fs-type':
5174 elif opt
== '--create':
5175 sys
.stderr
.write(warning_prefix
+
5176 ': The behaviour produced by the --create option is now the '
5177 'default,\nand passing the option is deprecated.\n')
5178 elif opt
== '--profile':
5180 elif opt
== '--symbol-transform':
5181 [pattern
, replacement
] = value
.split(":")
5183 pattern
= re
.compile(pattern
)
5185 raise FatalError("'%s' is not a valid regexp." % (pattern
,))
5186 ctx
.symbol_transforms
.append((pattern
, replacement
,))
5192 # Consistency check for options and arguments.
5198 sys
.stderr
.write(error_prefix
+
5199 ": must pass only one CVS repository.\n")
5206 ctx
.cvs_repository
= CVSRepositoryViaCVS(cvsroot
)
5208 ctx
.cvs_repository
= CVSRepositoryViaRCS(cvsroot
)
5210 if (not ctx
.target
) and (not ctx
.dump_only
) and (not ctx
.dry_run
):
5211 raise FatalError("must pass one of '-s' or '--dump-only'.")
5213 def not_both(opt1val
, opt1name
, opt2val
, opt2name
):
5214 if opt1val
and opt2val
:
5215 raise FatalError("cannot pass both '%s' and '%s'."
5216 % (opt1name
, opt2name
,))
5218 not_both(ctx
.target
, '-s',
5219 ctx
.dump_only
, '--dump-only')
5221 not_both(ctx
.dump_only
, '--dump-only',
5222 ctx
.existing_svnrepos
, '--existing-svnrepos')
5224 not_both(ctx
.bdb_txn_nosync
, '--bdb-txn-nosync',
5225 ctx
.existing_svnrepos
, '--existing-svnrepos')
5227 not_both(ctx
.dump_only
, '--dump-only',
5228 ctx
.bdb_txn_nosync
, '--bdb-txn-nosync')
5230 not_both(ctx
.quiet
, '-q',
5233 not_both(ctx
.fs_type
, '--fs-type',
5234 ctx
.existing_svnrepos
, '--existing-svnrepos')
5236 if ctx
.fs_type
and ctx
.fs_type
!= 'bdb' and ctx
.bdb_txn_nosync
:
5237 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5240 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5241 ctx
.project
= Project(ctx
.cvs_repository
.cvs_repos_path
,
5242 ctx
.trunk_base
, ctx
.branches_base
, ctx
.tags_base
)
5244 if ctx
.existing_svnrepos
and not os
.path
.isdir(ctx
.target
):
5245 raise FatalError("the svn-repos-path '%s' is not an "
5246 "existing directory." % ctx
.target
)
5248 if not ctx
.dump_only
and not ctx
.existing_svnrepos \
5249 and (not ctx
.dry_run
) and os
.path
.exists(ctx
.target
):
5250 raise FatalError("the svn-repos-path '%s' exists.\n"
5251 "Remove it, or pass '--existing-svnrepos'."
5254 if ctx
.target
and not ctx
.dry_run
:
5255 # Verify that svnadmin can be executed. The 'help' subcommand
5256 # should be harmless.
5258 check_command_runs([ctx
.svnadmin
, 'help'], 'svnadmin')
5259 except CommandFailedException
, e
:
5262 'svnadmin could not be executed. Please ensure that it is\n'
5263 'installed and/or use the --svnadmin option.' % (e
,))
5265 if ctx
.mime_types_file
:
5266 ctx
.svn_property_setters
.append(MimeMapper(ctx
.mime_types_file
))
5268 if ctx
.auto_props_file
:
5269 ctx
.svn_property_setters
.append(AutoPropsPropertySetter(
5270 ctx
.auto_props_file
, ctx
.auto_props_ignore_case
))
5272 ctx
.svn_property_setters
.append(BinaryFileDefaultMimeTypeSetter())
5273 ctx
.svn_property_setters
.append(BinaryFileEOLStyleSetter())
5275 if ctx
.eol_from_mime_type
:
5276 ctx
.svn_property_setters
.append(EOLStyleFromMimeTypeSetter())
5278 if ctx
.no_default_eol
:
5279 ctx
.svn_property_setters
.append(DefaultEOLStyleSetter(None))
5281 ctx
.svn_property_setters
.append(DefaultEOLStyleSetter('native'))
5283 if not ctx
.keywords_off
:
5284 ctx
.svn_property_setters
.append(
5285 KeywordsPropertySetter(SVN_KEYWORDS_VALUE
))
5287 ctx
.svn_property_setters
.append(ExecutablePropertySetter())
5289 # Make sure the tmp directory exists. Note that we don't check if
5290 # it's empty -- we want to be able to use, for example, "." to hold
5291 # tempfiles. But if we *did* want check if it were empty, we'd do
5292 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5293 if not os
.path
.exists(ctx
.tmpdir
):
5294 os
.mkdir(ctx
.tmpdir
)
5295 elif not os
.path
.isdir(ctx
.tmpdir
):
5297 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5298 " exists and is not a directory. Please make it be a directory,\n"
5299 " or specify some other directory for temporary files."
5302 # But do lock the tmpdir, to avoid process clash.
5304 os
.mkdir(os
.path
.join(ctx
.tmpdir
, 'cvs2svn.lock'))
5306 if e
.errno
== errno
.EACCES
:
5307 raise FatalError("Permission denied:"
5308 + " No write access to directory '%s'." % ctx
.tmpdir
)
5309 if e
.errno
== errno
.EEXIST
:
5311 "cvs2svn is using directory '%s' for temporary files, but\n"
5312 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5313 " cvs2svn process is currently using '%s' as its temporary\n"
5314 " workspace. If you are certain that is not the case,\n"
5315 " then remove the '%s/cvs2svn.lock' subdirectory."
5316 % (ctx
.tmpdir
, ctx
.tmpdir
, ctx
.tmpdir
, ctx
.tmpdir
,))
5321 prof
= hotshot
.Profile('cvs2svn.hotshot')
5322 prof
.runcall(convert
, start_pass
, end_pass
)
5325 convert(start_pass
, end_pass
)
5327 try: os
.rmdir(os
.path
.join(ctx
.tmpdir
, 'cvs2svn.lock'))
5331 if __name__
== '__main__':
5334 except FatalException
, e
:
5335 sys
.stderr
.write(str(e
))