Encapsulate pass_manager.convert() in new PassManager class.
[cvs2svn.git] / cvs2svn
blob389800b16ea14d795bc45788d901ba05674dcf33
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 from __future__ import generators
24 import os
25 import sys
26 import sha
27 import re
28 import time
29 import fileinput
30 import getopt
31 import marshal
32 import errno
33 import types
35 try:
36 # Try to get access to a bunch of encodings for use with --encoding.
37 # See http://cjkpython.i18n.org/ for details.
38 import iconv_codec
39 except ImportError:
40 pass
42 import cvs2svn_rcsparse
44 from cvs2svn_lib.boolean import *
46 from cvs2svn_lib import config
48 from cvs2svn_lib import common
49 from cvs2svn_lib.common import \
50 warning_prefix, \
51 error_prefix, \
52 FatalException, \
53 FatalError
55 from cvs2svn_lib.log import Log
57 from cvs2svn_lib.process import \
58 run_command, \
59 CommandFailedException, \
60 check_command_runs
62 from cvs2svn_lib import database
63 from cvs2svn_lib.context import Ctx
64 from cvs2svn_lib.artifact_manager import artifact_manager
65 from cvs2svn_lib.stats_keeper import StatsKeeper
66 from cvs2svn_lib import key_generator
67 from cvs2svn_lib import cvs_revision
68 from cvs2svn_lib import cvs_repository
69 from cvs2svn_lib import property_setters
70 from cvs2svn_lib.svn_revision_range import SVNRevisionRange
71 from cvs2svn_lib.tags_database import TagsDatabase
72 from cvs2svn_lib.cvs_revision_database import CVSRevisionDatabase
73 from cvs2svn_lib.openings_closings import \
74 OpeningsClosingsMap, \
75 SymbolingsLogger
76 from cvs2svn_lib.fill_source import FillSource
77 from cvs2svn_lib.last_symbolic_name_database import LastSymbolicNameDatabase
78 from cvs2svn_lib.symbol_database import SymbolDatabase
79 from cvs2svn_lib.project import Project
80 from cvs2svn_lib import collect_data
81 from cvs2svn_lib.symbolings_reader import SymbolingsReader
82 from cvs2svn_lib.svn_commit_item import SVNCommitItem
83 from cvs2svn_lib.svn_commit import SVNCommit
84 from cvs2svn_lib.svn_repository_mirror import \
85 SVNRepositoryMirror, \
86 SVNRepositoryMirrorDelegate
87 from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
88 from cvs2svn_lib.repository_delegate import RepositoryDelegate
89 from cvs2svn_lib.stdout_delegate import StdoutDelegate
90 from cvs2svn_lib.persistence_manager import PersistenceManager
91 from cvs2svn_lib.pass_manager import PassManager
94 # Make sure this Python is recent enough.
95 if sys.hexversion < 0x02020000:
96 sys.stderr.write("'%s: Python 2.2 or higher required, "
97 "see www.python.org.\n" % error_prefix)
98 sys.exit(1)
101 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
104 ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
106 def verify_filename_legal(filename):
107 """Verify that FILENAME does not include any control characters. If
108 it does, raise a FatalError."""
110 m = ctrl_characters_regexp.search(filename)
111 if m:
112 raise FatalError(
113 "Character %r in filename %r is not supported by subversion."
114 % (m.group(), filename,))
117 def sort_file(infilename, outfilename):
118 """Sort file INFILENAME, storing the results to OUTFILENAME."""
120 # GNU sort will sort our dates differently (incorrectly!) if our
121 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
122 # it to 'C'
123 lc_all_tmp = os.environ.get('LC_ALL', None)
124 os.environ['LC_ALL'] = 'C'
125 try:
126 # The -T option to sort has a nice side effect. The Win32 sort is
127 # case insensitive and cannot be used, and since it does not
128 # understand the -T option and dies if we try to use it, there is
129 # no risk that we use that sort by accident.
130 run_command('sort -T %s %s > %s'
131 % (Ctx().tmpdir, infilename, outfilename))
132 finally:
133 if lc_all_tmp is None:
134 del os.environ['LC_ALL']
135 else:
136 os.environ['LC_ALL'] = lc_all_tmp
139 class CVSCommit:
140 """Each instance of this class contains a number of CVS Revisions
141 that correspond to one or more Subversion Commits. After all CVS
142 Revisions are added to the grouping, calling process_revisions will
143 generate a Subversion Commit (or Commits) for the set of CVS
144 Revisions in the grouping."""
146 def __init__(self, digest, author, log):
147 self.digest = digest
148 self.author = author
149 self.log = log
151 # Symbolic names for which the last source revision has already
152 # been seen and for which the CVSRevisionAggregator has already
153 # generated a fill SVNCommit. See self.process_revisions().
154 self.done_symbols = [ ]
156 self.files = { }
157 # Lists of CVSRevisions
158 self.changes = [ ]
159 self.deletes = [ ]
161 # Start out with a t_min higher than any incoming time T, and a
162 # t_max lower than any incoming T. This way the first T will
163 # push t_min down to T, and t_max up to T, naturally (without any
164 # special-casing), and successive times will then ratchet them
165 # outward as appropriate.
166 self.t_min = 1L<<32
167 self.t_max = 0
169 # This will be set to the SVNCommit that occurs in self._commit.
170 self.motivating_commit = None
172 # This is a list of all non-primary commits motivated by the main
173 # commit. We gather these so that we can set their dates to the
174 # same date as the primary commit.
175 self.secondary_commits = [ ]
177 # State for handling default branches.
179 # Here is a tempting, but ultimately nugatory, bit of logic, which
180 # I share with you so you may appreciate the less attractive, but
181 # refreshingly non-nugatory, logic which follows it:
183 # If some of the commits in this txn happened on a non-trunk
184 # default branch, then those files will have to be copied into
185 # trunk manually after being changed on the branch (because the
186 # RCS "default branch" appears as head, i.e., trunk, in practice).
187 # As long as those copies don't overwrite any trunk paths that
188 # were also changed in this commit, then we can do the copies in
189 # the same revision, because they won't cover changes that don't
190 # appear anywhere/anywhen else. However, if some of the trunk dst
191 # paths *did* change in this commit, then immediately copying the
192 # branch changes would lose those trunk mods forever. So in this
193 # case, we need to do at least that copy in its own revision. And
194 # for simplicity's sake, if we're creating the new revision for
195 # even one file, then we just do all such copies together in the
196 # new revision.
198 # Doesn't that sound nice?
200 # Unfortunately, Subversion doesn't support copies with sources
201 # in the current txn. All copies must be based in committed
202 # revisions. Therefore, we generate the above-described new
203 # revision unconditionally.
205 # This is a list of c_revs, and a c_rev is appended for each
206 # default branch commit that will need to be copied to trunk (or
207 # deleted from trunk) in some generated revision following the
208 # "regular" revision.
209 self.default_branch_cvs_revisions = [ ]
211 def __cmp__(self, other):
212 # Commits should be sorted by t_max. If both self and other have
213 # the same t_max, break the tie using t_min, and lastly, digest.
214 # If all those are equal, then compare based on ids, to ensure
215 # that no two instances compare equal.
216 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
217 or cmp(self.digest, other.digest) or cmp(id(self), id(other)))
219 def __hash__(self):
220 return id(self)
222 def has_file(self, fname):
223 return self.files.has_key(fname)
225 def revisions(self):
226 return self.changes + self.deletes
228 def opens_symbolic_name(self, name):
229 """Return True if any CVSRevision in this commit is on a tag or a
230 branch or is the origin of a tag or branch."""
232 for c_rev in self.revisions():
233 if c_rev.opens_symbolic_name(name):
234 return True
235 return False
237 def add_revision(self, c_rev):
238 # Record the time range of this commit.
240 # ### ISSUE: It's possible, though unlikely, that the time range
241 # of a commit could get gradually expanded to be arbitrarily
242 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
243 # problem, and anyway deciding where to break it up would be a
244 # judgement call. For now, we just print a warning in commit() if
245 # this happens.
246 if c_rev.timestamp < self.t_min:
247 self.t_min = c_rev.timestamp
248 if c_rev.timestamp > self.t_max:
249 self.t_max = c_rev.timestamp
251 if c_rev.op == common.OP_DELETE:
252 self.deletes.append(c_rev)
253 else:
254 # OP_CHANGE or OP_ADD
255 self.changes.append(c_rev)
257 self.files[c_rev.fname] = 1
259 def _pre_commit(self):
260 """Generates any SVNCommits that must exist before the main commit."""
262 # There may be multiple c_revs in this commit that would cause
263 # branch B to be filled, but we only want to fill B once. On the
264 # other hand, there might be multiple branches committed on in
265 # this commit. Whatever the case, we should count exactly one
266 # commit per branch, because we only fill a branch once per
267 # CVSCommit. This list tracks which branches we've already
268 # counted.
269 accounted_for_sym_names = [ ]
271 def fill_needed(c_rev, pm):
272 """Return 1 if this is the first commit on a new branch (for
273 this file) and we need to fill the branch; else return 0
274 (meaning that some other file's first commit on the branch has
275 already done the fill for us).
277 If C_REV.op is OP_ADD, only return 1 if the branch that this
278 commit is on has no last filled revision.
280 PM is a PersistenceManager to query."""
282 # Different '.' counts indicate that c_rev is now on a different
283 # line of development (and may need a fill)
284 if c_rev.rev.count('.') != c_rev.prev_rev.rev.count('.'):
285 svn_revnum = pm.get_svn_revnum(c_rev.prev_rev.unique_key())
286 # It should be the case that when we have a file F that
287 # is added on branch B (thus, F on trunk is in state
288 # 'dead'), we generate an SVNCommit to fill B iff the branch
289 # has never been filled before.
291 # If this c_rev.op == OP_ADD, *and* the branch has never
292 # been filled before, then fill it now. Otherwise, no need to
293 # fill it.
294 if c_rev.op == common.OP_ADD:
295 if pm.last_filled.get(c_rev.branch_name, None) is None:
296 return 1
297 elif c_rev.op == common.OP_CHANGE:
298 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
299 return 1
300 elif c_rev.op == common.OP_DELETE:
301 if pm.last_filled.get(c_rev.branch_name, None) is None:
302 return 1
303 return 0
305 for c_rev in self.changes + self.deletes:
306 # If a commit is on a branch, we must ensure that the branch
307 # path being committed exists (in HEAD of the Subversion
308 # repository). If it doesn't exist, we will need to fill the
309 # branch. After the fill, the path on which we're committing
310 # will exist.
311 if c_rev.branch_name \
312 and c_rev.branch_name not in accounted_for_sym_names \
313 and c_rev.branch_name not in self.done_symbols \
314 and fill_needed(c_rev, Ctx()._persistence_manager):
315 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
316 % c_rev.branch_name)
317 svn_commit.set_symbolic_name(c_rev.branch_name)
318 self.secondary_commits.append(svn_commit)
319 accounted_for_sym_names.append(c_rev.branch_name)
321 def _commit(self):
322 """Generates the primary SVNCommit that corresponds to this
323 CVSCommit."""
325 # Generate an SVNCommit unconditionally. Even if the only change
326 # in this CVSCommit is a deletion of an already-deleted file (that
327 # is, a CVS revision in state 'dead' whose predecessor was also in
328 # state 'dead'), the conversion will still generate a Subversion
329 # revision containing the log message for the second dead
330 # revision, because we don't want to lose that information.
331 svn_commit = SVNCommit("commit")
332 self.motivating_commit = svn_commit
334 for c_rev in self.changes:
335 svn_commit.add_revision(c_rev)
336 # Only make a change if we need to:
337 if c_rev.rev == "1.1.1.1" and not c_rev.deltatext_exists:
338 # When 1.1.1.1 has an empty deltatext, the explanation is
339 # almost always that we're looking at an imported file whose
340 # 1.1 and 1.1.1.1 are identical. On such imports, CVS creates
341 # an RCS file where 1.1 has the content, and 1.1.1.1 has an
342 # empty deltatext, i.e, the same content as 1.1. There's no
343 # reason to reflect this non-change in the repository, so we
344 # want to do nothing in this case. (If we were really
345 # paranoid, we could make sure 1.1's log message is the
346 # CVS-generated "Initial revision\n", but I think the
347 # conditions above are strict enough.)
348 pass
349 else:
350 if c_rev.is_default_branch_revision():
351 self.default_branch_cvs_revisions.append(c_rev)
353 for c_rev in self.deletes:
354 # When a file is added on a branch, CVS not only adds the file
355 # on the branch, but generates a trunk revision (typically
356 # 1.1) for that file in state 'dead'. We only want to add
357 # this revision if the log message is not the standard cvs
358 # fabricated log message.
359 if c_rev.prev_rev is None:
360 # c_rev.branches may be empty if the originating branch
361 # has been excluded.
362 if not c_rev.branches:
363 continue
364 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
365 % (c_rev.filename(),
366 c_rev.branches[0]))
367 author, log_msg = \
368 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
369 if log_msg == cvs_generated_msg:
370 continue
372 svn_commit.add_revision(c_rev)
373 if c_rev.is_default_branch_revision():
374 self.default_branch_cvs_revisions.append(c_rev)
376 # There is a slight chance that we didn't actually register any
377 # CVSRevisions with our SVNCommit (see loop over self.deletes
378 # above), so if we have no CVSRevisions, we don't flush the
379 # svn_commit to disk and roll back our revnum.
380 if len(svn_commit.cvs_revs) > 0:
381 svn_commit.set_date(self.t_max)
382 svn_commit.flush()
383 else:
384 # We will not be flushing this SVNCommit, so rollback the
385 # SVNCommit revision counter.
386 SVNCommit.revnum -= 1
388 if not Ctx().trunk_only:
389 for c_rev in self.revisions():
390 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
392 def _post_commit(self):
393 """Generates any SVNCommits that we can perform now that _commit
394 has happened. That is, handle non-trunk default branches.
395 Sometimes an RCS file has a non-trunk default branch, so a commit
396 on that default branch would be visible in a default CVS checkout
397 of HEAD. If we don't copy that commit over to Subversion's trunk,
398 then there will be no Subversion tree which corresponds to that
399 CVS checkout. Of course, in order to copy the path over, we may
400 first need to delete the existing trunk there."""
402 # Only generate a commit if we have default branch revs
403 if len(self.default_branch_cvs_revisions):
404 # Generate an SVNCommit for all of our default branch c_revs.
405 svn_commit = SVNCommit("post-commit default branch(es)")
406 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
407 for c_rev in self.default_branch_cvs_revisions:
408 svn_commit.add_revision(c_rev)
409 Ctx()._symbolings_logger.log_default_branch_closing(
410 c_rev, svn_commit.revnum)
411 self.secondary_commits.append(svn_commit)
413 def process_revisions(self, done_symbols):
414 """Process all the CVSRevisions that this instance has, creating
415 one or more SVNCommits in the process. Generate fill SVNCommits
416 only for symbols not in DONE_SYMBOLS (avoids unnecessary
417 fills).
419 Return the primary SVNCommit that corresponds to this CVSCommit.
420 The returned SVNCommit is the commit that motivated any other
421 SVNCommits generated in this CVSCommit."""
423 self.done_symbols = done_symbols
424 seconds = self.t_max - self.t_min + 1
426 Log().write(Log.VERBOSE, '-' * 60)
427 Log().write(Log.VERBOSE, 'CVS Revision grouping:')
428 if seconds == 1:
429 Log().write(Log.VERBOSE, ' Start time: %s (duration: 1 second)'
430 % time.ctime(self.t_max))
431 else:
432 Log().write(Log.VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
433 Log().write(Log.VERBOSE, ' End time: %s (duration: %d seconds)'
434 % (time.ctime(self.t_max), seconds))
436 if seconds > config.COMMIT_THRESHOLD + 1:
437 Log().write(Log.WARN, '%s: grouping spans more than %d seconds'
438 % (warning_prefix, config.COMMIT_THRESHOLD))
440 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
441 self._commit()
442 return self.motivating_commit
444 self._pre_commit()
445 self._commit()
446 self._post_commit()
448 for svn_commit in self.secondary_commits:
449 svn_commit.set_date(self.motivating_commit.get_date())
450 svn_commit.flush()
452 return self.motivating_commit
455 class CVSRevisionAggregator:
456 """This class groups CVSRevisions into CVSCommits that represent
457 at least one SVNCommit."""
459 def __init__(self):
460 self.metadata_db = database.Database(
461 artifact_manager.get_temp_file(config.METADATA_DB),
462 database.DB_OPEN_READ)
463 if not Ctx().trunk_only:
464 self.last_revs_db = database.Database(
465 artifact_manager.get_temp_file(config.SYMBOL_LAST_CVS_REVS_DB),
466 database.DB_OPEN_READ)
468 # A map { key : CVSCommit } of CVS commits currently being
469 # accumulated. If the CVSCommit is still open to further
470 # CVSRevisions, then key is CVSRevision.digest. If not (because
471 # an inbound commit wanted to affect a file that was already
472 # within the CVSCommit), then key is CVSRevision.digest plus some
473 # number of appended '-'.
474 self.cvs_commits = {}
476 # List of ready commits.
477 self.ready_queue = [ ]
479 # A map { symbol : None } of symbolic names for which the last
480 # source CVSRevision has already been processed but which haven't
481 # been closed yet.
482 self.pending_symbols = {}
484 # A list of closed symbols. That is, we've already encountered
485 # the last CVSRevision that is a source for that symbol, the final
486 # fill for this symbol has been done, and we never need to fill it
487 # again.
488 self.done_symbols = [ ]
490 # This variable holds the most recently created primary svn_commit
491 # object. CVSRevisionAggregator maintains this variable merely
492 # for its date, so that it can set dates for the SVNCommits
493 # created in self._attempt_to_commit_symbols().
494 self.latest_primary_svn_commit = None
496 Ctx()._symbolings_logger = SymbolingsLogger()
497 Ctx()._persistence_manager = PersistenceManager(database.DB_OPEN_NEW)
498 Ctx()._default_branches_db = database.SDatabase(
499 artifact_manager.get_temp_file(config.DEFAULT_BRANCHES_DB),
500 database.DB_OPEN_READ)
502 def _extract_ready_commits(self, timestamp=None):
503 """Extract any active commits that expire by TIMESTAMP from
504 self.cvs_commits and append them to self.ready_queue. If
505 TIMESTAMP is not specified, then extract all commits."""
507 for digest_key, cvs_commit in self.cvs_commits.items():
508 if timestamp is None \
509 or cvs_commit.t_max + config.COMMIT_THRESHOLD < timestamp:
510 self.ready_queue.append(cvs_commit)
511 del self.cvs_commits[digest_key]
513 def _commit_ready_commits(self, timestamp=None):
514 """Sort the commits from self.ready_queue by time, then process
515 them in order. If TIMESTAMP is specified, only process commits
516 that have timestamp previous to TIMESTAMP."""
518 self.ready_queue.sort()
519 while self.ready_queue and \
520 (timestamp is None or self.ready_queue[0].t_max < timestamp):
521 cvs_commit = self.ready_queue.pop(0)
522 self.latest_primary_svn_commit = \
523 cvs_commit.process_revisions(self.done_symbols)
524 self._attempt_to_commit_symbols()
526 def process_revision(self, c_rev):
527 # Each time we read a new line, scan the accumulating commits to
528 # see if any are ready for processing.
529 self._extract_ready_commits(c_rev.timestamp)
531 for digest_key, cvs_commit in self.cvs_commits.items():
532 # If the inbound commit is on the same file as a pending commit,
533 # close the pending commit to further changes. Don't flush it though,
534 # as there may be other pending commits dated before this one.
535 # ### ISSUE: the has_file() check below is not optimal.
536 # It does fix the dataloss bug where revisions would get lost
537 # if checked in too quickly, but it can also break apart the
538 # commits. The correct fix would require tracking the dependencies
539 # between change sets and committing them in proper order.
540 if cvs_commit.has_file(c_rev.fname):
541 unused_id = digest_key + '-'
542 # Find a string that does is not already a key in
543 # the self.cvs_commits dict
544 while self.cvs_commits.has_key(unused_id):
545 unused_id += '-'
546 self.cvs_commits[unused_id] = cvs_commit
547 del self.cvs_commits[digest_key]
549 # Add this item into the set of still-available commits.
550 if self.cvs_commits.has_key(c_rev.digest):
551 cvs_commit = self.cvs_commits[c_rev.digest]
552 else:
553 author, log = self.metadata_db[c_rev.digest]
554 cvs_commit = CVSCommit(c_rev.digest, author, log)
555 self.cvs_commits[c_rev.digest] = cvs_commit
556 cvs_commit.add_revision(c_rev)
558 # Any elements in self.ready_queue at this point need to be
559 # processed, because this latest rev couldn't possibly be part of
560 # any of them.
561 self._commit_ready_commits()
563 self._add_pending_symbols(c_rev)
565 def flush(self):
566 """Commit anything left in self.cvs_commits. Then inform the
567 SymbolingsLogger that all commits are done."""
569 self._extract_ready_commits()
570 self._commit_ready_commits()
572 if not Ctx().trunk_only:
573 Ctx()._symbolings_logger.close()
575 def _add_pending_symbols(self, c_rev):
576 """Add to self.pending_symbols any symbols from C_REV for which
577 C_REV is the last CVSRevision.
579 If we're not doing a trunk-only conversion, get the symbolic names
580 that this c_rev is the last *source* CVSRevision for and add them
581 to those left over from previous passes through the aggregator."""
583 if not Ctx().trunk_only:
584 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
585 self.pending_symbols[sym] = None
587 def _attempt_to_commit_symbols(self):
588 """Generate one SVNCommit for each symbol in self.pending_symbols
589 that doesn't have an opening CVSRevision in either self.ready_queue
590 or self.cvs_commits.values()."""
592 # Make a list of all symbols from self.pending_symbols that do not
593 # have *source* CVSRevisions in the pending commit queues
594 # (self.cvs_commits or self.ready_queue):
595 closeable_symbols = []
596 pending_commits = self.cvs_commits.values() + self.ready_queue
597 for sym in self.pending_symbols:
598 for cvs_commit in pending_commits:
599 if cvs_commit.opens_symbolic_name(sym):
600 break
601 else:
602 closeable_symbols.append(sym)
604 # Sort the closeable symbols so that we will always process the
605 # symbols in the same order, regardless of the order in which the
606 # dict hashing algorithm hands them back to us. We do this so
607 # that our tests will get the same results on all platforms.
608 closeable_symbols.sort()
609 for sym in closeable_symbols:
610 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
611 svn_commit.set_symbolic_name(sym)
612 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
613 svn_commit.flush()
614 self.done_symbols.append(sym)
615 del self.pending_symbols[sym]
618 class Pass:
619 """Base class for one step of the conversion."""
621 def __init__(self):
622 # By default, use the pass object's class name as the pass name:
623 self.name = self.__class__.__name__
625 def register_artifacts(self):
626 """Register artifacts (created and needed) in artifact_manager."""
628 raise NotImplementedError
630 def _register_temp_file(self, basename):
631 """Helper method; for brevity only."""
633 artifact_manager.register_temp_file(basename, self)
635 def _register_temp_file_needed(self, basename):
636 """Helper method; for brevity only."""
638 artifact_manager.register_temp_file_needed(basename, self)
640 def run(self):
641 """Carry out this step of the conversion."""
643 raise NotImplementedError
646 class CollectRevsPass(Pass):
647 """This pass was formerly known as pass1."""
649 def register_artifacts(self):
650 self._register_temp_file(config.TAGS_LIST)
651 self._register_temp_file(config.BRANCHES_LIST)
652 self._register_temp_file(config.REVS_DATAFILE)
653 self._register_temp_file(config.RESYNC_DATAFILE)
654 self._register_temp_file(config.DEFAULT_BRANCHES_DB)
655 self._register_temp_file(config.METADATA_DB)
657 def run(self):
658 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
659 Log().write(Log.QUIET, "Examining all CVS ',v' files...")
660 cd = collect_data.CollectData()
662 def visit_file(baton, dirname, files):
663 cd = baton
664 for fname in files:
665 verify_filename_legal(fname)
666 if not fname.endswith(',v'):
667 continue
668 cd.found_valid_file = 1
669 pathname = os.path.join(dirname, fname)
670 if dirname.endswith(OS_SEP_PLUS_ATTIC):
671 # drop the 'Attic' portion from the pathname for the canonical name.
672 fdc = collect_data.FileDataCollector(
673 cd, os.path.join(dirname[:-6], fname), pathname)
674 else:
675 # If this file also exists in the attic, it's a fatal error
676 attic_path = os.path.join(dirname, 'Attic', fname)
677 if os.path.exists(attic_path):
678 err = "%s: A CVS repository cannot contain both %s and %s" \
679 % (error_prefix, pathname, attic_path)
680 sys.stderr.write(err + '\n')
681 cd.fatal_errors.append(err)
682 fdc = collect_data.FileDataCollector(cd, pathname, pathname)
683 Log().write(Log.NORMAL, pathname)
684 try:
685 cvs2svn_rcsparse.parse(open(pathname, 'rb'), fdc)
686 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
687 RuntimeError):
688 err = "%s: '%s' is not a valid ,v file" \
689 % (error_prefix, pathname)
690 sys.stderr.write(err + '\n')
691 cd.fatal_errors.append(err)
692 except:
693 Log().write(Log.WARN,
694 "Exception occurred while parsing %s" % pathname)
695 raise
697 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
698 Log().write(Log.VERBOSE, 'Processed', cd.num_files, 'files')
700 cd.write_symbol_db()
702 if len(cd.fatal_errors) > 0:
703 raise FatalException("Pass 1 complete.\n"
704 + "=" * 75 + "\n"
705 + "Error summary:\n"
706 + "\n".join(cd.fatal_errors) + "\n"
707 + "Exited due to fatal error(s).\n")
709 if cd.found_valid_file is None:
710 raise FatalException(
711 "\n"
712 "No RCS files found in your CVS Repository!\n"
713 "Are you absolutely certain you are pointing cvs2svn\n"
714 "at a CVS repository?\n"
715 "\n"
716 "Exited due to fatal error(s).\n")
718 StatsKeeper().reset_c_rev_info()
719 StatsKeeper().archive()
720 Log().write(Log.QUIET, "Done")
723 class ResyncRevsPass(Pass):
724 """Clean up the revision information.
726 This pass was formerly known as pass2."""
728 def register_artifacts(self):
729 self._register_temp_file(config.TAGS_DB)
730 self._register_temp_file(config.CLEAN_REVS_DATAFILE)
731 self._register_temp_file(config.TWEAKED_TIMESTAMPS_DB)
732 self._register_temp_file_needed(config.TAGS_LIST)
733 self._register_temp_file_needed(config.BRANCHES_LIST)
734 self._register_temp_file_needed(config.REVS_DATAFILE)
735 self._register_temp_file_needed(config.RESYNC_DATAFILE)
737 def run(self):
738 symbol_db = SymbolDatabase()
739 symbol_db.read()
741 # Convert the list of regexps to a list of strings
742 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
744 error_detected = 0
746 Log().write(Log.QUIET, "Checking for blocked exclusions...")
747 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
748 if blocked_excludes:
749 for branch, blockers in blocked_excludes.items():
750 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
751 "excluded because the following symbols depend "
752 "on it:\n" % (branch))
753 for blocker in blockers:
754 sys.stderr.write(" '%s'\n" % (blocker))
755 sys.stderr.write("\n")
756 error_detected = 1
758 Log().write(Log.QUIET, "Checking for forced tags with commits...")
759 invalid_forced_tags = [ ]
760 for forced_tag in Ctx().forced_tags:
761 if excludes.has_key(forced_tag):
762 continue
763 if symbol_db.branch_has_commit(forced_tag):
764 invalid_forced_tags.append(forced_tag)
765 if invalid_forced_tags:
766 sys.stderr.write(error_prefix + ": The following branches cannot be "
767 "forced to be tags because they have commits:\n")
768 for tag in invalid_forced_tags:
769 sys.stderr.write(" '%s'\n" % (tag))
770 sys.stderr.write("\n")
771 error_detected = 1
773 Log().write(Log.QUIET, "Checking for tag/branch mismatches...")
774 mismatches = symbol_db.find_mismatches(excludes)
775 def is_not_forced(mismatch):
776 name = mismatch[0]
777 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
778 mismatches = filter(is_not_forced, mismatches)
779 if mismatches:
780 sys.stderr.write(error_prefix + ": The following symbols are tags "
781 "in some files and branches in others.\nUse "
782 "--force-tag, --force-branch and/or --exclude to "
783 "resolve the symbols.\n")
784 for name, tag_count, branch_count, commit_count in mismatches:
785 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
786 "%d files and has commits in %d files.\n"
787 % (name, tag_count, branch_count, commit_count))
788 error_detected = 1
790 # Bail out now if we found errors
791 if error_detected:
792 sys.exit(1)
794 # Create the tags database
795 tags_db = TagsDatabase(database.DB_OPEN_NEW)
796 for tag in symbol_db.tags:
797 if tag not in Ctx().forced_branches:
798 tags_db.add(tag)
799 for tag in Ctx().forced_tags:
800 tags_db.add(tag)
802 Log().write(Log.QUIET, "Re-synchronizing CVS revision timestamps...")
804 # We may have recorded some changes in revisions' timestamp. We need to
805 # scan for any other files which may have had the same log message and
806 # occurred at "the same time" and change their timestamps, too.
808 # read the resync data file
809 def read_resync(fname):
810 """Read the .resync file into memory."""
812 ### note that we assume that we can hold the entire resync file in
813 ### memory. really large repositories with whacky timestamps could
814 ### bust this assumption. should that ever happen, then it is possible
815 ### to split the resync file into pieces and make multiple passes,
816 ### using each piece.
819 # A digest maps to a sequence of lists which specify a lower and upper
820 # time bound for matching up the commit. We keep a sequence of these
821 # because a number of checkins with the same log message (e.g. an empty
822 # log message) could need to be remapped. We also make them a list
823 # because we will dynamically expand the lower/upper bound as we find
824 # commits that fall into a particular msg and time range.
826 # resync == digest -> [[old_time_lower, old_time_upper, new_time], ...]
828 resync = { }
830 for line in fileinput.FileInput(fname):
831 t1 = int(line[:8], 16)
832 digest = line[9:DIGEST_END_IDX]
833 t2 = int(line[DIGEST_END_IDX+1:], 16)
834 t1_l = t1 - config.COMMIT_THRESHOLD/2
835 t1_u = t1 + config.COMMIT_THRESHOLD/2
836 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
838 # For each digest, sort the resync items in it in increasing order,
839 # based on the lower time bound.
840 for val in resync.values():
841 val.sort()
843 return resync
845 resync = read_resync(
846 artifact_manager.get_temp_file(config.RESYNC_DATAFILE))
848 output = open(artifact_manager.get_temp_file(config.CLEAN_REVS_DATAFILE),
849 'w')
851 tweaked_timestamps_db = database.Database(
852 artifact_manager.get_temp_file(config.TWEAKED_TIMESTAMPS_DB),
853 database.DB_OPEN_NEW)
855 # process the revisions file, looking for items to clean up
856 for line in fileinput.FileInput(
857 artifact_manager.get_temp_file(config.REVS_DATAFILE)):
858 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
860 # Skip this entire revision if it's on an excluded branch
861 if excludes.has_key(c_rev.branch_name):
862 continue
864 new_prev_ts = None
865 if c_rev.prev_rev is not None:
866 new_prev_ts = tweaked_timestamps_db.get(
867 c_rev.prev_rev.unique_key(), None)
868 if new_prev_ts:
869 c_rev.prev_timestamp = new_prev_ts
871 new_next_ts = None
872 if c_rev.next_rev is not None:
873 new_next_ts = tweaked_timestamps_db.get(
874 c_rev.next_rev.unique_key(), None)
875 if new_next_ts:
876 c_rev.next_timestamp = new_next_ts
878 # Remove all references to excluded tags and branches
879 def not_excluded(symbol, excludes=excludes):
880 return not excludes.has_key(symbol)
881 c_rev.branches = filter(not_excluded, c_rev.branches)
882 c_rev.tags = filter(not_excluded, c_rev.tags)
884 # Convert all branches that are forced to be tags
885 for forced_tag in Ctx().forced_tags:
886 if forced_tag in c_rev.branches:
887 c_rev.branches.remove(forced_tag)
888 c_rev.tags.append(forced_tag)
890 # Convert all tags that are forced to be branches
891 for forced_branch in Ctx().forced_branches:
892 if forced_branch in c_rev.tags:
893 c_rev.tags.remove(forced_branch)
894 c_rev.branches.append(forced_branch)
896 # see if this is "near" any of the resync records we
897 # have recorded for this digest [of the log message].
898 for record in resync.get(c_rev.digest, []):
899 if record[2] == c_rev.timestamp:
900 # This means that either c_rev is the same revision that
901 # caused the resync record to exist, or c_rev is a different
902 # CVS revision that happens to have the same timestamp. In
903 # either case, we don't have to do anything, so we...
904 continue
906 if record[0] <= c_rev.timestamp <= record[1]:
907 # bingo! We probably want to remap the time on this c_rev,
908 # unless the remapping would be useless because the new time
909 # would fall outside the COMMIT_THRESHOLD window for this
910 # commit group.
911 new_timestamp = record[2]
912 # If the new timestamp is earlier than that of our previous revision
913 if new_timestamp < c_rev.prev_timestamp:
914 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
915 + " to time %s, which is before previous the time of"
916 + " revision %s (%s):")
917 Log().write(Log.WARN, desc % (warning_prefix, c_rev.rev,
918 c_rev.cvs_path, new_timestamp,
919 c_rev.prev_rev.rev,
920 c_rev.prev_timestamp))
921 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
922 # the timestamp of c_rev within COMMIT_THRESHOLD of the
923 # attempted resync time, then sync back to c_rev.prev_timestamp
924 # + 1...
925 if ((c_rev.prev_timestamp + 1) - new_timestamp) \
926 < config.COMMIT_THRESHOLD:
927 new_timestamp = c_rev.prev_timestamp + 1
928 Log().write(Log.WARN, "%s: Time set to %s" % (warning_prefix,
929 new_timestamp))
930 else:
931 Log().write(Log.WARN, "%s: Timestamp left untouched" %
932 warning_prefix)
933 continue
935 # If the new timestamp is later than that of our next revision
936 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
937 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
938 + " to time %s, which is after time of next"
939 + " revision %s (%s):")
940 Log().write(Log.WARN, desc % (warning_prefix, c_rev.rev,
941 c_rev.cvs_path, new_timestamp,
942 c_rev.next_rev.rev,
943 c_rev.next_timestamp))
944 # If resyncing our rev to c_rev.next_timestamp - 1 will place
945 # the timestamp of c_rev within COMMIT_THRESHOLD of the
946 # attempted resync time, then sync forward to c_rev.next_timestamp
947 # - 1...
948 if (new_timestamp - (c_rev.next_timestamp - 1)) \
949 < config.COMMIT_THRESHOLD:
950 new_timestamp = c_rev.next_timestamp - 1
951 Log().write(Log.WARN, "%s: Time set to %s" % (warning_prefix,
952 new_timestamp))
953 else:
954 Log().write(Log.WARN, "%s: Timestamp left untouched" %
955 warning_prefix)
956 continue
958 # Fix for Issue #71: Avoid resyncing two consecutive revisions
959 # to the same timestamp.
960 elif (new_timestamp == c_rev.prev_timestamp
961 or new_timestamp == c_rev.next_timestamp):
962 continue
964 # adjust the time range. we want the COMMIT_THRESHOLD from the
965 # bounds of the earlier/latest commit in this group.
966 record[0] = min(record[0],
967 c_rev.timestamp - config.COMMIT_THRESHOLD/2)
968 record[1] = max(record[1],
969 c_rev.timestamp + config.COMMIT_THRESHOLD/2)
971 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
972 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
973 new_timestamp - c_rev.timestamp)
974 Log().write(Log.VERBOSE, msg)
976 c_rev.timestamp = new_timestamp
977 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
979 # stop looking for hits
980 break
982 output.write(str(c_rev) + "\n")
983 Log().write(Log.QUIET, "Done")
986 class SortRevsPass(Pass):
987 """This pass was formerly known as pass3."""
989 def register_artifacts(self):
990 self._register_temp_file(config.SORTED_REVS_DATAFILE)
991 self._register_temp_file_needed(config.CLEAN_REVS_DATAFILE)
993 def run(self):
994 Log().write(Log.QUIET, "Sorting CVS revisions...")
995 sort_file(artifact_manager.get_temp_file(config.CLEAN_REVS_DATAFILE),
996 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE))
997 Log().write(Log.QUIET, "Done")
1000 class CreateDatabasesPass(Pass):
1001 """This pass was formerly known as pass4."""
1003 def register_artifacts(self):
1004 if not Ctx().trunk_only:
1005 self._register_temp_file(config.SYMBOL_LAST_CVS_REVS_DB)
1006 self._register_temp_file(config.CVS_REVS_DB)
1007 self._register_temp_file_needed(config.SORTED_REVS_DATAFILE)
1009 def run(self):
1010 """Iterate through sorted revs, storing them in a database.
1011 If we're not doing a trunk-only conversion, generate the
1012 LastSymbolicNameDatabase, which contains the last CVSRevision
1013 that is a source for each tag or branch."""
1015 Log().write(Log.QUIET,
1016 "Copying CVS revision data from flat file to database...")
1017 cvs_revs_db = CVSRevisionDatabase(database.DB_OPEN_NEW)
1018 if not Ctx().trunk_only:
1019 Log().write(Log.QUIET,
1020 "Finding last CVS revisions for all symbolic names...")
1021 last_sym_name_db = LastSymbolicNameDatabase()
1022 else:
1023 # This is to avoid testing Ctx().trunk_only every time around the loop
1024 class DummyLSNDB:
1025 def noop(*args): pass
1026 log_revision = noop
1027 create_database = noop
1028 last_sym_name_db = DummyLSNDB()
1030 for line in fileinput.FileInput(
1031 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE)):
1032 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
1033 cvs_revs_db.log_revision(c_rev)
1034 last_sym_name_db.log_revision(c_rev)
1035 StatsKeeper().record_c_rev(c_rev)
1037 StatsKeeper().set_stats_reflect_exclude(True)
1039 last_sym_name_db.create_database()
1040 StatsKeeper().archive()
1041 Log().write(Log.QUIET, "Done")
1044 class AggregateRevsPass(Pass):
1045 """Generate the SVNCommit <-> CVSRevision mapping databases.
1046 CVSCommit._commit also calls SymbolingsLogger to register
1047 CVSRevisions that represent an opening or closing for a path on a
1048 branch or tag. See SymbolingsLogger for more details.
1050 This pass was formerly known as pass5."""
1052 def register_artifacts(self):
1053 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1054 self._register_temp_file(config.SYMBOL_CLOSINGS_TMP)
1055 self._register_temp_file(config.SVN_REVNUMS_TO_CVS_REVS)
1056 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1057 if not Ctx().trunk_only:
1058 self._register_temp_file_needed(config.SYMBOL_LAST_CVS_REVS_DB)
1059 self._register_temp_file_needed(config.CVS_REVS_DB)
1060 self._register_temp_file_needed(config.TAGS_DB)
1061 self._register_temp_file_needed(config.DEFAULT_BRANCHES_DB)
1062 self._register_temp_file_needed(config.METADATA_DB)
1063 self._register_temp_file_needed(config.SORTED_REVS_DATAFILE)
1065 def run(self):
1066 Log().write(Log.QUIET, "Mapping CVS revisions to Subversion commits...")
1068 aggregator = CVSRevisionAggregator()
1069 for line in fileinput.FileInput(
1070 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE)):
1071 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
1072 if not (Ctx().trunk_only and c_rev.branch_name is not None):
1073 aggregator.process_revision(c_rev)
1074 aggregator.flush()
1076 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
1077 StatsKeeper().archive()
1078 Log().write(Log.QUIET, "Done")
1081 class SortSymbolsPass(Pass):
1082 """This pass was formerly known as pass6."""
1084 def register_artifacts(self):
1085 if not Ctx().trunk_only:
1086 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1087 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1089 def run(self):
1090 Log().write(Log.QUIET, "Sorting symbolic name source revisions...")
1092 if not Ctx().trunk_only:
1093 sort_file(
1094 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1095 artifact_manager.get_temp_file(
1096 config.SYMBOL_OPENINGS_CLOSINGS_SORTED))
1097 Log().write(Log.QUIET, "Done")
1100 class IndexSymbolsPass(Pass):
1101 """This pass was formerly known as pass7."""
1103 def register_artifacts(self):
1104 if not Ctx().trunk_only:
1105 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1106 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1108 def run(self):
1109 Log().write(Log.QUIET, "Determining offsets for all symbolic names...")
1111 def generate_offsets_for_symbolings():
1112 """This function iterates through all the lines in
1113 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1114 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1115 where SYMBOLIC_NAME is first encountered. This will allow us to
1116 seek to the various offsets in the file and sequentially read only
1117 the openings and closings that we need."""
1119 ###PERF This is a fine example of a db that can be in-memory and
1120 #just flushed to disk when we're done. Later, it can just be sucked
1121 #back into memory.
1122 offsets_db = database.Database(
1123 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB),
1124 database.DB_OPEN_NEW)
1126 file = open(
1127 artifact_manager.get_temp_file(
1128 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1129 'r')
1130 old_sym = ""
1131 while 1:
1132 fpos = file.tell()
1133 line = file.readline()
1134 if not line:
1135 break
1136 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
1137 if sym != old_sym:
1138 Log().write(Log.VERBOSE, " ", sym)
1139 old_sym = sym
1140 offsets_db[sym] = fpos
1142 if not Ctx().trunk_only:
1143 generate_offsets_for_symbolings()
1144 Log().write(Log.QUIET, "Done.")
1147 class OutputPass(Pass):
1148 """This pass was formerly known as pass8."""
1150 def register_artifacts(self):
1151 self._register_temp_file(config.SVN_MIRROR_REVISIONS_DB)
1152 self._register_temp_file(config.SVN_MIRROR_NODES_DB)
1153 self._register_temp_file_needed(config.CVS_REVS_DB)
1154 self._register_temp_file_needed(config.TAGS_DB)
1155 self._register_temp_file_needed(config.METADATA_DB)
1156 self._register_temp_file_needed(config.SVN_REVNUMS_TO_CVS_REVS)
1157 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1158 if not Ctx().trunk_only:
1159 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1160 self._register_temp_file_needed(config.SYMBOL_OFFSETS_DB)
1162 def run(self):
1163 svncounter = 2 # Repository initialization is 1.
1164 repos = SVNRepositoryMirror()
1165 persistence_manager = PersistenceManager(database.DB_OPEN_READ)
1167 if Ctx().target:
1168 if not Ctx().dry_run:
1169 repos.add_delegate(RepositoryDelegate())
1170 Log().write(Log.QUIET, "Starting Subversion Repository.")
1171 else:
1172 if not Ctx().dry_run:
1173 repos.add_delegate(DumpfileDelegate())
1174 Log().write(Log.QUIET, "Starting Subversion Dumpfile.")
1176 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
1178 while 1:
1179 svn_commit = persistence_manager.get_svn_commit(svncounter)
1180 if not svn_commit:
1181 break
1182 repos.commit(svn_commit)
1183 svncounter += 1
1185 repos.finish()
1188 pass_manager = PassManager([
1189 CollectRevsPass(),
1190 ResyncRevsPass(),
1191 SortRevsPass(),
1192 CreateDatabasesPass(),
1193 AggregateRevsPass(),
1194 SortSymbolsPass(),
1195 IndexSymbolsPass(),
1196 OutputPass(),
1200 def help_passes():
1201 print 'PASSES:'
1202 for i in range(len(pass_manager.passes)):
1203 print '%5d : %s' % (i + 1, pass_manager.passes[i].name,)
1206 def normalize_ttb_path(opt, path):
1207 """Normalize a path to be used for --trunk, --tags, or --branches.
1209 1. Strip leading, trailing, and duplicated '/'.
1210 2. Verify that the path is not empty.
1212 Return the normalized path.
1214 If the path is invalid, write an error message and exit."""
1216 norm_path = common.path_join(*path.split('/'))
1217 if not norm_path:
1218 raise FatalError("cannot pass an empty path to %s." % (opt,))
1219 return norm_path
1222 def usage():
1223 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
1224 % os.path.basename(sys.argv[0])
1225 print ' --help, -h print this usage message and exit with success'
1226 print ' --help-passes list the available passes and their numbers'
1227 print ' --version print the version number'
1228 print ' -q quiet'
1229 print ' -v verbose'
1230 print ' -s PATH path for SVN repos'
1231 print ' -p START[:END] start at pass START, end at pass END of %d' \
1232 % len(pass_manager.passes)
1233 print ' If only START is given, run only pass START'
1234 print ' (implicitly enables --skip-cleanup)'
1235 print ' --existing-svnrepos load into existing SVN repository'
1236 print ' --dumpfile=PATH name of intermediate svn dumpfile'
1237 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
1238 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
1239 print ' --dry-run do not create a repository or a dumpfile;'
1240 print ' just print what would happen.'
1241 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
1242 print ' (only use this if having problems with RCS)'
1243 print ' --svnadmin=PATH path to the svnadmin program'
1244 print ' --trunk-only convert only trunk commits, not tags nor branches'
1245 print ' --trunk=PATH path for trunk (default: %s)' \
1246 % Ctx().trunk_base
1247 print ' --branches=PATH path for branches (default: %s)' \
1248 % Ctx().branches_base
1249 print ' --tags=PATH path for tags (default: %s)' \
1250 % Ctx().tags_base
1251 print ' --no-prune don\'t prune empty directories'
1252 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
1253 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
1254 print ' Multiple of these options may be passed, where they'
1255 print ' will be treated as an ordered list of encodings to'
1256 print ' attempt (with "ascii" as a hardcoded last resort)'
1257 print ' --force-branch=NAME force NAME to be a branch'
1258 print ' --force-tag=NAME force NAME to be a tag'
1259 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
1260 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
1261 print ' use Python regexp and reference syntax respectively'
1262 print ' --username=NAME username for cvs2svn-synthesized commits'
1263 print ' --skip-cleanup prevent the deletion of intermediate files'
1264 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
1265 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
1266 print ' --cvs-revnums record CVS revision numbers as file properties'
1267 print ' --auto-props=FILE set file properties from the auto-props section'
1268 print ' of a file in svn config format'
1269 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
1270 print ' --mime-types=FILE specify an apache-style mime.types file for'
1271 print ' setting svn:mime-type'
1272 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
1273 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
1274 print ' non-binary files with undetermined mime types'
1275 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
1276 print ' cvs2svn sets svn:keywords on non-binary files to'
1277 print ' "%s")' % config.SVN_KEYWORDS_VALUE
1280 def main():
1281 # Convenience var, so we don't have to keep instantiating this Borg.
1282 ctx = Ctx()
1284 profiling = None
1285 start_pass = 1
1286 end_pass = len(pass_manager.passes)
1288 try:
1289 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
1290 [ "help", "help-passes", "create", "trunk=",
1291 "username=", "existing-svnrepos",
1292 "branches=", "tags=", "encoding=",
1293 "force-branch=", "force-tag=", "exclude=",
1294 "use-cvs", "mime-types=",
1295 "auto-props=", "auto-props-ignore-case",
1296 "eol-from-mime-type", "no-default-eol",
1297 "trunk-only", "no-prune", "dry-run",
1298 "dump-only", "dumpfile=", "tmpdir=",
1299 "svnadmin=", "skip-cleanup", "cvs-revnums",
1300 "bdb-txn-nosync", "fs-type=",
1301 "version", "profile",
1302 "keywords-off", "symbol-transform="])
1303 except getopt.GetoptError, e:
1304 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
1305 usage()
1306 sys.exit(1)
1308 for opt, value in opts:
1309 if opt == '--version':
1310 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
1311 sys.exit(0)
1312 elif opt == '-p':
1313 # Don't cleanup if we're doing incrementals.
1314 ctx.skip_cleanup = 1
1315 if value.find(':') > 0:
1316 start_pass, end_pass = map(int, value.split(':'))
1317 else:
1318 end_pass = start_pass = int(value)
1319 if start_pass > len(pass_manager.passes) or start_pass < 1:
1320 raise FatalError(
1321 'illegal value (%d) for starting pass. Must be 1 through %d.'
1322 % (int(start_pass), len(pass_manager.passes),))
1323 if end_pass < start_pass or end_pass > len(pass_manager.passes):
1324 raise FatalError(
1325 'illegal value (%d) for ending pass. Must be %d through %d.'
1326 % (int(end_pass), int(start_pass), len(pass_manager.passes),))
1327 elif (opt == '--help') or (opt == '-h'):
1328 ctx.print_help = 1
1329 elif opt == '--help-passes':
1330 help_passes()
1331 sys.exit(0)
1332 elif opt == '-v':
1333 Log().log_level = Log.VERBOSE
1334 ctx.verbose = 1
1335 elif opt == '-q':
1336 Log().log_level = Log.QUIET
1337 ctx.quiet = 1
1338 elif opt == '-s':
1339 ctx.target = value
1340 elif opt == '--existing-svnrepos':
1341 ctx.existing_svnrepos = 1
1342 elif opt == '--dumpfile':
1343 ctx.dumpfile = value
1344 elif opt == '--tmpdir':
1345 ctx.tmpdir = value
1346 elif opt == '--use-cvs':
1347 ctx.use_cvs = 1
1348 elif opt == '--svnadmin':
1349 ctx.svnadmin = value
1350 elif opt == '--trunk-only':
1351 ctx.trunk_only = 1
1352 elif opt == '--trunk':
1353 ctx.trunk_base = normalize_ttb_path(opt, value)
1354 elif opt == '--branches':
1355 ctx.branches_base = normalize_ttb_path(opt, value)
1356 elif opt == '--tags':
1357 ctx.tags_base = normalize_ttb_path(opt, value)
1358 elif opt == '--no-prune':
1359 ctx.prune = None
1360 elif opt == '--dump-only':
1361 ctx.dump_only = 1
1362 elif opt == '--dry-run':
1363 ctx.dry_run = 1
1364 elif opt == '--encoding':
1365 ctx.encoding.insert(-1, value)
1366 elif opt == '--force-branch':
1367 ctx.forced_branches.append(value)
1368 elif opt == '--force-tag':
1369 ctx.forced_tags.append(value)
1370 elif opt == '--exclude':
1371 try:
1372 ctx.excludes.append(re.compile('^' + value + '$'))
1373 except re.error, e:
1374 raise FatalError("'%s' is not a valid regexp." % (value,))
1375 elif opt == '--mime-types':
1376 ctx.mime_types_file = value
1377 elif opt == '--auto-props':
1378 ctx.auto_props_file = value
1379 elif opt == '--auto-props-ignore-case':
1380 ctx.auto_props_ignore_case = True
1381 elif opt == '--eol-from-mime-type':
1382 ctx.eol_from_mime_type = 1
1383 elif opt == '--no-default-eol':
1384 ctx.no_default_eol = 1
1385 elif opt == '--keywords-off':
1386 ctx.keywords_off = 1
1387 elif opt == '--username':
1388 ctx.username = value
1389 elif opt == '--skip-cleanup':
1390 ctx.skip_cleanup = 1
1391 elif opt == '--cvs-revnums':
1392 ctx.svn_property_setters.append(
1393 property_setters.CVSRevisionNumberSetter())
1394 elif opt == '--bdb-txn-nosync':
1395 ctx.bdb_txn_nosync = 1
1396 elif opt == '--fs-type':
1397 ctx.fs_type = value
1398 elif opt == '--create':
1399 sys.stderr.write(warning_prefix +
1400 ': The behaviour produced by the --create option is now the '
1401 'default,\nand passing the option is deprecated.\n')
1402 elif opt == '--profile':
1403 profiling = 1
1404 elif opt == '--symbol-transform':
1405 [pattern, replacement] = value.split(":")
1406 try:
1407 pattern = re.compile(pattern)
1408 except re.error, e:
1409 raise FatalError("'%s' is not a valid regexp." % (pattern,))
1410 ctx.symbol_transforms.append((pattern, replacement,))
1412 if ctx.print_help:
1413 usage()
1414 sys.exit(0)
1416 # Consistency check for options and arguments.
1417 if len(args) == 0:
1418 usage()
1419 sys.exit(1)
1421 if len(args) > 1:
1422 sys.stderr.write(error_prefix +
1423 ": must pass only one CVS repository.\n")
1424 usage()
1425 sys.exit(1)
1427 cvsroot = args[0]
1429 if ctx.use_cvs:
1430 ctx.cvs_repository = cvs_repository.CVSRepositoryViaCVS(cvsroot)
1431 else:
1432 ctx.cvs_repository = cvs_repository.CVSRepositoryViaRCS(cvsroot)
1434 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
1435 raise FatalError("must pass one of '-s' or '--dump-only'.")
1437 def not_both(opt1val, opt1name, opt2val, opt2name):
1438 if opt1val and opt2val:
1439 raise FatalError("cannot pass both '%s' and '%s'."
1440 % (opt1name, opt2name,))
1442 not_both(ctx.target, '-s',
1443 ctx.dump_only, '--dump-only')
1445 not_both(ctx.dump_only, '--dump-only',
1446 ctx.existing_svnrepos, '--existing-svnrepos')
1448 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
1449 ctx.existing_svnrepos, '--existing-svnrepos')
1451 not_both(ctx.dump_only, '--dump-only',
1452 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
1454 not_both(ctx.quiet, '-q',
1455 ctx.verbose, '-v')
1457 not_both(ctx.fs_type, '--fs-type',
1458 ctx.existing_svnrepos, '--existing-svnrepos')
1460 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
1461 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
1462 % ctx.fs_type)
1464 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
1465 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
1466 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
1468 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
1469 raise FatalError("the svn-repos-path '%s' is not an "
1470 "existing directory." % ctx.target)
1472 if not ctx.dump_only and not ctx.existing_svnrepos \
1473 and (not ctx.dry_run) and os.path.exists(ctx.target):
1474 raise FatalError("the svn-repos-path '%s' exists.\n"
1475 "Remove it, or pass '--existing-svnrepos'."
1476 % ctx.target)
1478 if ctx.target and not ctx.dry_run:
1479 # Verify that svnadmin can be executed. The 'help' subcommand
1480 # should be harmless.
1481 try:
1482 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
1483 except CommandFailedException, e:
1484 raise FatalError(
1485 '%s\n'
1486 'svnadmin could not be executed. Please ensure that it is\n'
1487 'installed and/or use the --svnadmin option.' % (e,))
1489 ctx.svn_property_setters.append(
1490 property_setters.ExecutablePropertySetter())
1492 ctx.svn_property_setters.append(
1493 property_setters.BinaryFileEOLStyleSetter())
1495 if ctx.mime_types_file:
1496 ctx.svn_property_setters.append(
1497 property_setters.MimeMapper(ctx.mime_types_file))
1499 if ctx.auto_props_file:
1500 ctx.svn_property_setters.append(
1501 property_setters.AutoPropsPropertySetter(
1502 ctx.auto_props_file, ctx.auto_props_ignore_case))
1504 ctx.svn_property_setters.append(
1505 property_setters.BinaryFileDefaultMimeTypeSetter())
1507 if ctx.eol_from_mime_type:
1508 ctx.svn_property_setters.append(
1509 property_setters.EOLStyleFromMimeTypeSetter())
1511 if ctx.no_default_eol:
1512 ctx.svn_property_setters.append(
1513 property_setters.DefaultEOLStyleSetter(None))
1514 else:
1515 ctx.svn_property_setters.append(
1516 property_setters.DefaultEOLStyleSetter('native'))
1518 if not ctx.keywords_off:
1519 ctx.svn_property_setters.append(
1520 property_setters.KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE))
1522 # Make sure the tmp directory exists. Note that we don't check if
1523 # it's empty -- we want to be able to use, for example, "." to hold
1524 # tempfiles. But if we *did* want check if it were empty, we'd do
1525 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
1526 if not os.path.exists(ctx.tmpdir):
1527 os.mkdir(ctx.tmpdir)
1528 elif not os.path.isdir(ctx.tmpdir):
1529 raise FatalError(
1530 "cvs2svn tried to use '%s' for temporary files, but that path\n"
1531 " exists and is not a directory. Please make it be a directory,\n"
1532 " or specify some other directory for temporary files."
1533 % (ctx.tmpdir,))
1535 # But do lock the tmpdir, to avoid process clash.
1536 try:
1537 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
1538 except OSError, e:
1539 if e.errno == errno.EACCES:
1540 raise FatalError("Permission denied:"
1541 + " No write access to directory '%s'." % ctx.tmpdir)
1542 if e.errno == errno.EEXIST:
1543 raise FatalError(
1544 "cvs2svn is using directory '%s' for temporary files, but\n"
1545 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
1546 " cvs2svn process is currently using '%s' as its temporary\n"
1547 " workspace. If you are certain that is not the case,\n"
1548 " then remove the '%s/cvs2svn.lock' subdirectory."
1549 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
1550 raise
1551 try:
1552 if profiling:
1553 import hotshot
1554 prof = hotshot.Profile('cvs2svn.hotshot')
1555 prof.runcall(pass_manager.run, start_pass, end_pass)
1556 prof.close()
1557 else:
1558 pass_manager.run(start_pass, end_pass)
1559 finally:
1560 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
1561 except: pass
1564 if __name__ == '__main__':
1565 try:
1566 main()
1567 except FatalException, e:
1568 sys.stderr.write(str(e))
1569 sys.exit(1)