Use CVSRevision.deltatext_exists instead of deltatext_code.
[cvs2svn.git] / cvs2svn
blob009ce272c561e5ace43d8929bd7c7c5be39f0a9e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 from __future__ import generators
24 import os
25 import sys
26 import sha
27 import re
28 import time
29 import fileinput
30 import getopt
31 import marshal
32 import errno
33 import types
35 try:
36 # Try to get access to a bunch of encodings for use with --encoding.
37 # See http://cjkpython.i18n.org/ for details.
38 import iconv_codec
39 except ImportError:
40 pass
42 import cvs2svn_rcsparse
44 from cvs2svn_lib.boolean import *
46 from cvs2svn_lib import config
48 from cvs2svn_lib import common
49 from cvs2svn_lib.common import \
50 warning_prefix, \
51 error_prefix, \
52 FatalException, \
53 FatalError
55 from cvs2svn_lib.log import Log
57 from cvs2svn_lib.process import \
58 run_command, \
59 CommandFailedException, \
60 check_command_runs
62 from cvs2svn_lib import database
63 from cvs2svn_lib.context import Ctx
64 from cvs2svn_lib.artifact_manager import artifact_manager
65 from cvs2svn_lib.stats_keeper import StatsKeeper
66 from cvs2svn_lib import key_generator
67 from cvs2svn_lib import cvs_revision
68 from cvs2svn_lib import cvs_repository
69 from cvs2svn_lib import property_setters
70 from cvs2svn_lib.svn_revision_range import SVNRevisionRange
71 from cvs2svn_lib.tags_database import TagsDatabase
72 from cvs2svn_lib.cvs_revision_database import CVSRevisionDatabase
73 from cvs2svn_lib.openings_closings import \
74 OpeningsClosingsMap, \
75 SymbolingsLogger
76 from cvs2svn_lib.fill_source import FillSource
77 from cvs2svn_lib.last_symbolic_name_database import LastSymbolicNameDatabase
78 from cvs2svn_lib.symbol_database import SymbolDatabase
79 from cvs2svn_lib.project import Project
80 from cvs2svn_lib import collect_data
81 from cvs2svn_lib.symbolings_reader import SymbolingsReader
82 from cvs2svn_lib.svn_commit_item import SVNCommitItem
83 from cvs2svn_lib.svn_commit import SVNCommit
84 from cvs2svn_lib.svn_repository_mirror import \
85 SVNRepositoryMirror, \
86 SVNRepositoryMirrorDelegate
87 from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
88 from cvs2svn_lib.repository_delegate import RepositoryDelegate
89 from cvs2svn_lib.stdout_delegate import StdoutDelegate
90 from cvs2svn_lib.persistence_manager import PersistenceManager
93 # Make sure this Python is recent enough.
94 if sys.hexversion < 0x02020000:
95 sys.stderr.write("'%s: Python 2.2 or higher required, "
96 "see www.python.org.\n" % error_prefix)
97 sys.exit(1)
100 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
103 ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
105 def verify_filename_legal(filename):
106 """Verify that FILENAME does not include any control characters. If
107 it does, raise a FatalError."""
109 m = ctrl_characters_regexp.search(filename)
110 if m:
111 raise FatalError(
112 "Character %r in filename %r is not supported by subversion."
113 % (m.group(), filename,))
116 def sort_file(infilename, outfilename):
117 """Sort file INFILENAME, storing the results to OUTFILENAME."""
119 # GNU sort will sort our dates differently (incorrectly!) if our
120 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
121 # it to 'C'
122 lc_all_tmp = os.environ.get('LC_ALL', None)
123 os.environ['LC_ALL'] = 'C'
124 try:
125 # The -T option to sort has a nice side effect. The Win32 sort is
126 # case insensitive and cannot be used, and since it does not
127 # understand the -T option and dies if we try to use it, there is
128 # no risk that we use that sort by accident.
129 run_command('sort -T %s %s > %s'
130 % (Ctx().tmpdir, infilename, outfilename))
131 finally:
132 if lc_all_tmp is None:
133 del os.environ['LC_ALL']
134 else:
135 os.environ['LC_ALL'] = lc_all_tmp
138 class CVSCommit:
139 """Each instance of this class contains a number of CVS Revisions
140 that correspond to one or more Subversion Commits. After all CVS
141 Revisions are added to the grouping, calling process_revisions will
142 generate a Subversion Commit (or Commits) for the set of CVS
143 Revisions in the grouping."""
145 def __init__(self, digest, author, log):
146 self.digest = digest
147 self.author = author
148 self.log = log
150 # Symbolic names for which the last source revision has already
151 # been seen and for which the CVSRevisionAggregator has already
152 # generated a fill SVNCommit. See self.process_revisions().
153 self.done_symbols = [ ]
155 self.files = { }
156 # Lists of CVSRevisions
157 self.changes = [ ]
158 self.deletes = [ ]
160 # Start out with a t_min higher than any incoming time T, and a
161 # t_max lower than any incoming T. This way the first T will
162 # push t_min down to T, and t_max up to T, naturally (without any
163 # special-casing), and successive times will then ratchet them
164 # outward as appropriate.
165 self.t_min = 1L<<32
166 self.t_max = 0
168 # This will be set to the SVNCommit that occurs in self._commit.
169 self.motivating_commit = None
171 # This is a list of all non-primary commits motivated by the main
172 # commit. We gather these so that we can set their dates to the
173 # same date as the primary commit.
174 self.secondary_commits = [ ]
176 # State for handling default branches.
178 # Here is a tempting, but ultimately nugatory, bit of logic, which
179 # I share with you so you may appreciate the less attractive, but
180 # refreshingly non-nugatory, logic which follows it:
182 # If some of the commits in this txn happened on a non-trunk
183 # default branch, then those files will have to be copied into
184 # trunk manually after being changed on the branch (because the
185 # RCS "default branch" appears as head, i.e., trunk, in practice).
186 # As long as those copies don't overwrite any trunk paths that
187 # were also changed in this commit, then we can do the copies in
188 # the same revision, because they won't cover changes that don't
189 # appear anywhere/anywhen else. However, if some of the trunk dst
190 # paths *did* change in this commit, then immediately copying the
191 # branch changes would lose those trunk mods forever. So in this
192 # case, we need to do at least that copy in its own revision. And
193 # for simplicity's sake, if we're creating the new revision for
194 # even one file, then we just do all such copies together in the
195 # new revision.
197 # Doesn't that sound nice?
199 # Unfortunately, Subversion doesn't support copies with sources
200 # in the current txn. All copies must be based in committed
201 # revisions. Therefore, we generate the above-described new
202 # revision unconditionally.
204 # This is a list of c_revs, and a c_rev is appended for each
205 # default branch commit that will need to be copied to trunk (or
206 # deleted from trunk) in some generated revision following the
207 # "regular" revision.
208 self.default_branch_cvs_revisions = [ ]
210 def __cmp__(self, other):
211 # Commits should be sorted by t_max. If both self and other have
212 # the same t_max, break the tie using t_min, and lastly, digest.
213 # If all those are equal, then compare based on ids, to ensure
214 # that no two instances compare equal.
215 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
216 or cmp(self.digest, other.digest) or cmp(id(self), id(other)))
218 def __hash__(self):
219 return id(self)
221 def has_file(self, fname):
222 return self.files.has_key(fname)
224 def revisions(self):
225 return self.changes + self.deletes
227 def opens_symbolic_name(self, name):
228 """Return True if any CVSRevision in this commit is on a tag or a
229 branch or is the origin of a tag or branch."""
231 for c_rev in self.revisions():
232 if c_rev.opens_symbolic_name(name):
233 return True
234 return False
236 def add_revision(self, c_rev):
237 # Record the time range of this commit.
239 # ### ISSUE: It's possible, though unlikely, that the time range
240 # of a commit could get gradually expanded to be arbitrarily
241 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
242 # problem, and anyway deciding where to break it up would be a
243 # judgement call. For now, we just print a warning in commit() if
244 # this happens.
245 if c_rev.timestamp < self.t_min:
246 self.t_min = c_rev.timestamp
247 if c_rev.timestamp > self.t_max:
248 self.t_max = c_rev.timestamp
250 if c_rev.op == common.OP_DELETE:
251 self.deletes.append(c_rev)
252 else:
253 # OP_CHANGE or OP_ADD
254 self.changes.append(c_rev)
256 self.files[c_rev.fname] = 1
258 def _pre_commit(self):
259 """Generates any SVNCommits that must exist before the main commit."""
261 # There may be multiple c_revs in this commit that would cause
262 # branch B to be filled, but we only want to fill B once. On the
263 # other hand, there might be multiple branches committed on in
264 # this commit. Whatever the case, we should count exactly one
265 # commit per branch, because we only fill a branch once per
266 # CVSCommit. This list tracks which branches we've already
267 # counted.
268 accounted_for_sym_names = [ ]
270 def fill_needed(c_rev, pm):
271 """Return 1 if this is the first commit on a new branch (for
272 this file) and we need to fill the branch; else return 0
273 (meaning that some other file's first commit on the branch has
274 already done the fill for us).
276 If C_REV.op is OP_ADD, only return 1 if the branch that this
277 commit is on has no last filled revision.
279 PM is a PersistenceManager to query."""
281 # Different '.' counts indicate that c_rev is now on a different
282 # line of development (and may need a fill)
283 if c_rev.rev.count('.') != c_rev.prev_rev.rev.count('.'):
284 svn_revnum = pm.get_svn_revnum(c_rev.prev_rev.unique_key())
285 # It should be the case that when we have a file F that
286 # is added on branch B (thus, F on trunk is in state
287 # 'dead'), we generate an SVNCommit to fill B iff the branch
288 # has never been filled before.
290 # If this c_rev.op == OP_ADD, *and* the branch has never
291 # been filled before, then fill it now. Otherwise, no need to
292 # fill it.
293 if c_rev.op == common.OP_ADD:
294 if pm.last_filled.get(c_rev.branch_name, None) is None:
295 return 1
296 elif c_rev.op == common.OP_CHANGE:
297 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
298 return 1
299 elif c_rev.op == common.OP_DELETE:
300 if pm.last_filled.get(c_rev.branch_name, None) is None:
301 return 1
302 return 0
304 for c_rev in self.changes + self.deletes:
305 # If a commit is on a branch, we must ensure that the branch
306 # path being committed exists (in HEAD of the Subversion
307 # repository). If it doesn't exist, we will need to fill the
308 # branch. After the fill, the path on which we're committing
309 # will exist.
310 if c_rev.branch_name \
311 and c_rev.branch_name not in accounted_for_sym_names \
312 and c_rev.branch_name not in self.done_symbols \
313 and fill_needed(c_rev, Ctx()._persistence_manager):
314 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
315 % c_rev.branch_name)
316 svn_commit.set_symbolic_name(c_rev.branch_name)
317 self.secondary_commits.append(svn_commit)
318 accounted_for_sym_names.append(c_rev.branch_name)
320 def _commit(self):
321 """Generates the primary SVNCommit that corresponds to this
322 CVSCommit."""
324 # Generate an SVNCommit unconditionally. Even if the only change
325 # in this CVSCommit is a deletion of an already-deleted file (that
326 # is, a CVS revision in state 'dead' whose predecessor was also in
327 # state 'dead'), the conversion will still generate a Subversion
328 # revision containing the log message for the second dead
329 # revision, because we don't want to lose that information.
330 svn_commit = SVNCommit("commit")
331 self.motivating_commit = svn_commit
333 for c_rev in self.changes:
334 svn_commit.add_revision(c_rev)
335 # Only make a change if we need to:
336 if c_rev.rev == "1.1.1.1" and not c_rev.deltatext_exists:
337 # When 1.1.1.1 has an empty deltatext, the explanation is
338 # almost always that we're looking at an imported file whose
339 # 1.1 and 1.1.1.1 are identical. On such imports, CVS creates
340 # an RCS file where 1.1 has the content, and 1.1.1.1 has an
341 # empty deltatext, i.e, the same content as 1.1. There's no
342 # reason to reflect this non-change in the repository, so we
343 # want to do nothing in this case. (If we were really
344 # paranoid, we could make sure 1.1's log message is the
345 # CVS-generated "Initial revision\n", but I think the
346 # conditions above are strict enough.)
347 pass
348 else:
349 if c_rev.is_default_branch_revision():
350 self.default_branch_cvs_revisions.append(c_rev)
352 for c_rev in self.deletes:
353 # When a file is added on a branch, CVS not only adds the file
354 # on the branch, but generates a trunk revision (typically
355 # 1.1) for that file in state 'dead'. We only want to add
356 # this revision if the log message is not the standard cvs
357 # fabricated log message.
358 if c_rev.prev_rev is None:
359 # c_rev.branches may be empty if the originating branch
360 # has been excluded.
361 if not c_rev.branches:
362 continue
363 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
364 % (c_rev.filename(),
365 c_rev.branches[0]))
366 author, log_msg = \
367 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
368 if log_msg == cvs_generated_msg:
369 continue
371 svn_commit.add_revision(c_rev)
372 if c_rev.is_default_branch_revision():
373 self.default_branch_cvs_revisions.append(c_rev)
375 # There is a slight chance that we didn't actually register any
376 # CVSRevisions with our SVNCommit (see loop over self.deletes
377 # above), so if we have no CVSRevisions, we don't flush the
378 # svn_commit to disk and roll back our revnum.
379 if len(svn_commit.cvs_revs) > 0:
380 svn_commit.flush()
381 else:
382 # We will not be flushing this SVNCommit, so rollback the
383 # SVNCommit revision counter.
384 SVNCommit.revnum -= 1
386 if not Ctx().trunk_only:
387 for c_rev in self.revisions():
388 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
390 def _post_commit(self):
391 """Generates any SVNCommits that we can perform now that _commit
392 has happened. That is, handle non-trunk default branches.
393 Sometimes an RCS file has a non-trunk default branch, so a commit
394 on that default branch would be visible in a default CVS checkout
395 of HEAD. If we don't copy that commit over to Subversion's trunk,
396 then there will be no Subversion tree which corresponds to that
397 CVS checkout. Of course, in order to copy the path over, we may
398 first need to delete the existing trunk there."""
400 # Only generate a commit if we have default branch revs
401 if len(self.default_branch_cvs_revisions):
402 # Generate an SVNCommit for all of our default branch c_revs.
403 svn_commit = SVNCommit("post-commit default branch(es)")
404 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
405 for c_rev in self.default_branch_cvs_revisions:
406 svn_commit.add_revision(c_rev)
407 Ctx()._symbolings_logger.log_default_branch_closing(
408 c_rev, svn_commit.revnum)
409 self.secondary_commits.append(svn_commit)
411 def process_revisions(self, done_symbols):
412 """Process all the CVSRevisions that this instance has, creating
413 one or more SVNCommits in the process. Generate fill SVNCommits
414 only for symbols not in DONE_SYMBOLS (avoids unnecessary
415 fills).
417 Return the primary SVNCommit that corresponds to this CVSCommit.
418 The returned SVNCommit is the commit that motivated any other
419 SVNCommits generated in this CVSCommit."""
421 self.done_symbols = done_symbols
422 seconds = self.t_max - self.t_min + 1
424 Log().write(Log.VERBOSE, '-' * 60)
425 Log().write(Log.VERBOSE, 'CVS Revision grouping:')
426 if seconds == 1:
427 Log().write(Log.VERBOSE, ' Start time: %s (duration: 1 second)'
428 % time.ctime(self.t_max))
429 else:
430 Log().write(Log.VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
431 Log().write(Log.VERBOSE, ' End time: %s (duration: %d seconds)'
432 % (time.ctime(self.t_max), seconds))
434 if seconds > config.COMMIT_THRESHOLD + 1:
435 Log().write(Log.WARN, '%s: grouping spans more than %d seconds'
436 % (warning_prefix, config.COMMIT_THRESHOLD))
438 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
439 self._commit()
440 return self.motivating_commit
442 self._pre_commit()
443 self._commit()
444 self._post_commit()
446 for svn_commit in self.secondary_commits:
447 svn_commit.set_date(self.motivating_commit.get_date())
448 svn_commit.flush()
450 return self.motivating_commit
453 class CVSRevisionAggregator:
454 """This class groups CVSRevisions into CVSCommits that represent
455 at least one SVNCommit."""
457 def __init__(self):
458 self.metadata_db = database.Database(
459 artifact_manager.get_temp_file(config.METADATA_DB),
460 database.DB_OPEN_READ)
461 if not Ctx().trunk_only:
462 self.last_revs_db = database.Database(
463 artifact_manager.get_temp_file(config.SYMBOL_LAST_CVS_REVS_DB),
464 database.DB_OPEN_READ)
466 # A map { key : CVSCommit } of CVS commits currently being
467 # accumulated. If the CVSCommit is still open to further
468 # CVSRevisions, then key is CVSRevision.digest. If not (because
469 # an inbound commit wanted to affect a file that was already
470 # within the CVSCommit), then key is CVSRevision.digest plus some
471 # number of appended '-'.
472 self.cvs_commits = {}
474 # List of ready commits.
475 self.ready_queue = [ ]
477 # A map { symbol : None } of symbolic names for which the last
478 # source CVSRevision has already been processed but which haven't
479 # been closed yet.
480 self.pending_symbols = {}
482 # A list of closed symbols. That is, we've already encountered
483 # the last CVSRevision that is a source for that symbol, the final
484 # fill for this symbol has been done, and we never need to fill it
485 # again.
486 self.done_symbols = [ ]
488 # This variable holds the most recently created primary svn_commit
489 # object. CVSRevisionAggregator maintains this variable merely
490 # for its date, so that it can set dates for the SVNCommits
491 # created in self._attempt_to_commit_symbols().
492 self.latest_primary_svn_commit = None
494 Ctx()._symbolings_logger = SymbolingsLogger()
495 Ctx()._persistence_manager = PersistenceManager(database.DB_OPEN_NEW)
496 Ctx()._default_branches_db = database.SDatabase(
497 artifact_manager.get_temp_file(config.DEFAULT_BRANCHES_DB),
498 database.DB_OPEN_READ)
500 def _extract_ready_commits(self, timestamp):
501 """Extract and return any active commits that expire by TIMESTAMP."""
503 for digest_key, cvs_commit in self.cvs_commits.items():
504 if cvs_commit.t_max + config.COMMIT_THRESHOLD < timestamp:
505 self.ready_queue.append(cvs_commit)
506 del self.cvs_commits[digest_key]
508 def _commit_ready_commits(self):
509 """Sort the commits from self.ready_queue by time, then process them."""
511 self.ready_queue.sort()
512 while self.ready_queue:
513 cvs_commit = self.ready_queue[0]
514 del self.ready_queue[0]
515 self.latest_primary_svn_commit = \
516 cvs_commit.process_revisions(self.done_symbols)
517 self._attempt_to_commit_symbols()
519 def process_revision(self, c_rev):
520 # Each time we read a new line, scan the accumulating commits to
521 # see if any are ready for processing.
522 self._extract_ready_commits(c_rev.timestamp)
524 for digest_key, cvs_commit in self.cvs_commits.items():
525 # If the inbound commit is on the same file as a pending commit,
526 # close the pending commit to further changes. Don't flush it though,
527 # as there may be other pending commits dated before this one.
528 # ### ISSUE: the has_file() check below is not optimal.
529 # It does fix the dataloss bug where revisions would get lost
530 # if checked in too quickly, but it can also break apart the
531 # commits. The correct fix would require tracking the dependencies
532 # between change sets and committing them in proper order.
533 if cvs_commit.has_file(c_rev.fname):
534 unused_id = digest_key + '-'
535 # Find a string that does is not already a key in
536 # the self.cvs_commits dict
537 while self.cvs_commits.has_key(unused_id):
538 unused_id += '-'
539 self.cvs_commits[unused_id] = cvs_commit
540 del self.cvs_commits[digest_key]
542 # Add this item into the set of still-available commits.
543 if self.cvs_commits.has_key(c_rev.digest):
544 cvs_commit = self.cvs_commits[c_rev.digest]
545 else:
546 author, log = self.metadata_db[c_rev.digest]
547 cvs_commit = CVSCommit(c_rev.digest, author, log)
548 self.cvs_commits[c_rev.digest] = cvs_commit
549 cvs_commit.add_revision(c_rev)
551 # Any elements in self.ready_queue at this point need to be
552 # processed, because this latest rev couldn't possibly be part of
553 # any of them.
554 self._commit_ready_commits()
556 self._add_pending_symbols(c_rev)
558 def flush(self):
559 """Commit anything left in self.cvs_commits. Then inform the
560 SymbolingsLogger that all commits are done."""
562 self._extract_ready_commits(1L<<32)
563 self._commit_ready_commits()
565 if not Ctx().trunk_only:
566 Ctx()._symbolings_logger.close()
568 def _add_pending_symbols(self, c_rev):
569 """Add to self.pending_symbols any symbols from C_REV for which
570 C_REV is the last CVSRevision.
572 If we're not doing a trunk-only conversion, get the symbolic names
573 that this c_rev is the last *source* CVSRevision for and add them
574 to those left over from previous passes through the aggregator."""
576 if not Ctx().trunk_only:
577 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
578 self.pending_symbols[sym] = None
580 def _attempt_to_commit_symbols(self):
581 """Generate one SVNCommit for each symbol in self.pending_symbols
582 that doesn't have an opening CVSRevision in either self.ready_queue
583 or self.cvs_commits.values()."""
585 # Make a list of all symbols from self.pending_symbols that do not
586 # have *source* CVSRevisions in the pending commit queues
587 # (self.cvs_commits or self.ready_queue):
588 closeable_symbols = []
589 pending_commits = self.cvs_commits.values() + self.ready_queue
590 for sym in self.pending_symbols:
591 for cvs_commit in pending_commits:
592 if cvs_commit.opens_symbolic_name(sym):
593 break
594 else:
595 closeable_symbols.append(sym)
597 # Sort the closeable symbols so that we will always process the
598 # symbols in the same order, regardless of the order in which the
599 # dict hashing algorithm hands them back to us. We do this so
600 # that our tests will get the same results on all platforms.
601 closeable_symbols.sort()
602 for sym in closeable_symbols:
603 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
604 svn_commit.set_symbolic_name(sym)
605 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
606 svn_commit.flush()
607 self.done_symbols.append(sym)
608 del self.pending_symbols[sym]
611 class Pass:
612 """Base class for one step of the conversion."""
614 def register_artifacts(self):
615 """Register artifacts (created and needed) in artifact_manager."""
617 raise NotImplementedError
619 def _register_temp_file(self, basename):
620 """Helper method; for brevity only."""
622 artifact_manager.register_temp_file(basename, self)
624 def _register_temp_file_needed(self, basename):
625 """Helper method; for brevity only."""
627 artifact_manager.register_temp_file_needed(basename, self)
629 def run(self):
630 """Carry out this step of the conversion."""
632 raise NotImplementedError
635 class Pass1(Pass):
636 def register_artifacts(self):
637 self._register_temp_file(config.TAGS_LIST)
638 self._register_temp_file(config.BRANCHES_LIST)
639 self._register_temp_file(config.REVS_DATAFILE)
640 self._register_temp_file(config.RESYNC_DATAFILE)
641 self._register_temp_file(config.DEFAULT_BRANCHES_DB)
642 self._register_temp_file(config.METADATA_DB)
644 def run(self):
645 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
646 Log().write(Log.QUIET, "Examining all CVS ',v' files...")
647 cd = collect_data.CollectData()
649 def visit_file(baton, dirname, files):
650 cd = baton
651 for fname in files:
652 verify_filename_legal(fname)
653 if not fname.endswith(',v'):
654 continue
655 cd.found_valid_file = 1
656 pathname = os.path.join(dirname, fname)
657 if dirname.endswith(OS_SEP_PLUS_ATTIC):
658 # drop the 'Attic' portion from the pathname for the canonical name.
659 fdc = collect_data.FileDataCollector(
660 cd, os.path.join(dirname[:-6], fname), pathname)
661 else:
662 # If this file also exists in the attic, it's a fatal error
663 attic_path = os.path.join(dirname, 'Attic', fname)
664 if os.path.exists(attic_path):
665 err = "%s: A CVS repository cannot contain both %s and %s" \
666 % (error_prefix, pathname, attic_path)
667 sys.stderr.write(err + '\n')
668 cd.fatal_errors.append(err)
669 fdc = collect_data.FileDataCollector(cd, pathname, pathname)
670 Log().write(Log.NORMAL, pathname)
671 try:
672 cvs2svn_rcsparse.parse(open(pathname, 'rb'), fdc)
673 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
674 RuntimeError):
675 err = "%s: '%s' is not a valid ,v file" \
676 % (error_prefix, pathname)
677 sys.stderr.write(err + '\n')
678 cd.fatal_errors.append(err)
679 except:
680 Log().write(Log.WARN,
681 "Exception occurred while parsing %s" % pathname)
682 raise
684 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
685 Log().write(Log.VERBOSE, 'Processed', cd.num_files, 'files')
687 cd.write_symbol_db()
689 if len(cd.fatal_errors) > 0:
690 raise FatalException("Pass 1 complete.\n"
691 + "=" * 75 + "\n"
692 + "Error summary:\n"
693 + "\n".join(cd.fatal_errors) + "\n"
694 + "Exited due to fatal error(s).\n")
696 if cd.found_valid_file is None:
697 raise FatalException(
698 "\n"
699 "No RCS files found in your CVS Repository!\n"
700 "Are you absolutely certain you are pointing cvs2svn\n"
701 "at a CVS repository?\n"
702 "\n"
703 "Exited due to fatal error(s).\n")
705 StatsKeeper().reset_c_rev_info()
706 StatsKeeper().archive()
707 Log().write(Log.QUIET, "Done")
710 class Pass2(Pass):
711 """Pass 2: clean up the revision information."""
713 def register_artifacts(self):
714 self._register_temp_file(config.TAGS_DB)
715 self._register_temp_file(config.CLEAN_REVS_DATAFILE)
716 self._register_temp_file(config.TWEAKED_TIMESTAMPS_DB)
717 self._register_temp_file_needed(config.TAGS_LIST)
718 self._register_temp_file_needed(config.BRANCHES_LIST)
719 self._register_temp_file_needed(config.REVS_DATAFILE)
720 self._register_temp_file_needed(config.RESYNC_DATAFILE)
722 def run(self):
723 symbol_db = SymbolDatabase()
724 symbol_db.read()
726 # Convert the list of regexps to a list of strings
727 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
729 error_detected = 0
731 Log().write(Log.QUIET, "Checking for blocked exclusions...")
732 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
733 if blocked_excludes:
734 for branch, blockers in blocked_excludes.items():
735 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
736 "excluded because the following symbols depend "
737 "on it:\n" % (branch))
738 for blocker in blockers:
739 sys.stderr.write(" '%s'\n" % (blocker))
740 sys.stderr.write("\n")
741 error_detected = 1
743 Log().write(Log.QUIET, "Checking for forced tags with commits...")
744 invalid_forced_tags = [ ]
745 for forced_tag in Ctx().forced_tags:
746 if excludes.has_key(forced_tag):
747 continue
748 if symbol_db.branch_has_commit(forced_tag):
749 invalid_forced_tags.append(forced_tag)
750 if invalid_forced_tags:
751 sys.stderr.write(error_prefix + ": The following branches cannot be "
752 "forced to be tags because they have commits:\n")
753 for tag in invalid_forced_tags:
754 sys.stderr.write(" '%s'\n" % (tag))
755 sys.stderr.write("\n")
756 error_detected = 1
758 Log().write(Log.QUIET, "Checking for tag/branch mismatches...")
759 mismatches = symbol_db.find_mismatches(excludes)
760 def is_not_forced(mismatch):
761 name = mismatch[0]
762 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
763 mismatches = filter(is_not_forced, mismatches)
764 if mismatches:
765 sys.stderr.write(error_prefix + ": The following symbols are tags "
766 "in some files and branches in others.\nUse "
767 "--force-tag, --force-branch and/or --exclude to "
768 "resolve the symbols.\n")
769 for name, tag_count, branch_count, commit_count in mismatches:
770 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
771 "%d files and has commits in %d files.\n"
772 % (name, tag_count, branch_count, commit_count))
773 error_detected = 1
775 # Bail out now if we found errors
776 if error_detected:
777 sys.exit(1)
779 # Create the tags database
780 tags_db = TagsDatabase(database.DB_OPEN_NEW)
781 for tag in symbol_db.tags:
782 if tag not in Ctx().forced_branches:
783 tags_db.add(tag)
784 for tag in Ctx().forced_tags:
785 tags_db.add(tag)
787 Log().write(Log.QUIET, "Re-synchronizing CVS revision timestamps...")
789 # We may have recorded some changes in revisions' timestamp. We need to
790 # scan for any other files which may have had the same log message and
791 # occurred at "the same time" and change their timestamps, too.
793 # read the resync data file
794 def read_resync(fname):
795 """Read the .resync file into memory."""
797 ### note that we assume that we can hold the entire resync file in
798 ### memory. really large repositories with whacky timestamps could
799 ### bust this assumption. should that ever happen, then it is possible
800 ### to split the resync file into pieces and make multiple passes,
801 ### using each piece.
804 # A digest maps to a sequence of lists which specify a lower and upper
805 # time bound for matching up the commit. We keep a sequence of these
806 # because a number of checkins with the same log message (e.g. an empty
807 # log message) could need to be remapped. We also make them a list
808 # because we will dynamically expand the lower/upper bound as we find
809 # commits that fall into a particular msg and time range.
811 # resync == digest -> [[old_time_lower, old_time_upper, new_time], ...]
813 resync = { }
815 for line in fileinput.FileInput(fname):
816 t1 = int(line[:8], 16)
817 digest = line[9:DIGEST_END_IDX]
818 t2 = int(line[DIGEST_END_IDX+1:], 16)
819 t1_l = t1 - config.COMMIT_THRESHOLD/2
820 t1_u = t1 + config.COMMIT_THRESHOLD/2
821 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
823 # For each digest, sort the resync items in it in increasing order,
824 # based on the lower time bound.
825 for val in resync.values():
826 val.sort()
828 return resync
830 resync = read_resync(
831 artifact_manager.get_temp_file(config.RESYNC_DATAFILE))
833 output = open(artifact_manager.get_temp_file(config.CLEAN_REVS_DATAFILE),
834 'w')
836 tweaked_timestamps_db = database.Database(
837 artifact_manager.get_temp_file(config.TWEAKED_TIMESTAMPS_DB),
838 database.DB_OPEN_NEW)
840 # process the revisions file, looking for items to clean up
841 for line in fileinput.FileInput(
842 artifact_manager.get_temp_file(config.REVS_DATAFILE)):
843 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
845 # Skip this entire revision if it's on an excluded branch
846 if excludes.has_key(c_rev.branch_name):
847 continue
849 new_prev_ts = None
850 if c_rev.prev_rev is not None:
851 new_prev_ts = tweaked_timestamps_db.get(
852 c_rev.prev_rev.unique_key(), None)
853 if new_prev_ts:
854 c_rev.prev_timestamp = new_prev_ts
856 new_next_ts = None
857 if c_rev.next_rev is not None:
858 new_next_ts = tweaked_timestamps_db.get(
859 c_rev.next_rev.unique_key(), None)
860 if new_next_ts:
861 c_rev.next_timestamp = new_next_ts
863 # Remove all references to excluded tags and branches
864 def not_excluded(symbol, excludes=excludes):
865 return not excludes.has_key(symbol)
866 c_rev.branches = filter(not_excluded, c_rev.branches)
867 c_rev.tags = filter(not_excluded, c_rev.tags)
869 # Convert all branches that are forced to be tags
870 for forced_tag in Ctx().forced_tags:
871 if forced_tag in c_rev.branches:
872 c_rev.branches.remove(forced_tag)
873 c_rev.tags.append(forced_tag)
875 # Convert all tags that are forced to be branches
876 for forced_branch in Ctx().forced_branches:
877 if forced_branch in c_rev.tags:
878 c_rev.tags.remove(forced_branch)
879 c_rev.branches.append(forced_branch)
881 # see if this is "near" any of the resync records we
882 # have recorded for this digest [of the log message].
883 for record in resync.get(c_rev.digest, []):
884 if record[2] == c_rev.timestamp:
885 # This means that either c_rev is the same revision that
886 # caused the resync record to exist, or c_rev is a different
887 # CVS revision that happens to have the same timestamp. In
888 # either case, we don't have to do anything, so we...
889 continue
891 if record[0] <= c_rev.timestamp <= record[1]:
892 # bingo! We probably want to remap the time on this c_rev,
893 # unless the remapping would be useless because the new time
894 # would fall outside the COMMIT_THRESHOLD window for this
895 # commit group.
896 new_timestamp = record[2]
897 # If the new timestamp is earlier than that of our previous revision
898 if new_timestamp < c_rev.prev_timestamp:
899 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
900 + " to time %s, which is before previous the time of"
901 + " revision %s (%s):")
902 Log().write(Log.WARN, desc % (warning_prefix, c_rev.rev,
903 c_rev.cvs_path, new_timestamp,
904 c_rev.prev_rev.rev,
905 c_rev.prev_timestamp))
906 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
907 # the timestamp of c_rev within COMMIT_THRESHOLD of the
908 # attempted resync time, then sync back to c_rev.prev_timestamp
909 # + 1...
910 if ((c_rev.prev_timestamp + 1) - new_timestamp) \
911 < config.COMMIT_THRESHOLD:
912 new_timestamp = c_rev.prev_timestamp + 1
913 Log().write(Log.WARN, "%s: Time set to %s" % (warning_prefix,
914 new_timestamp))
915 else:
916 Log().write(Log.WARN, "%s: Timestamp left untouched" %
917 warning_prefix)
918 continue
920 # If the new timestamp is later than that of our next revision
921 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
922 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
923 + " to time %s, which is after time of next"
924 + " revision %s (%s):")
925 Log().write(Log.WARN, desc % (warning_prefix, c_rev.rev,
926 c_rev.cvs_path, new_timestamp,
927 c_rev.next_rev.rev,
928 c_rev.next_timestamp))
929 # If resyncing our rev to c_rev.next_timestamp - 1 will place
930 # the timestamp of c_rev within COMMIT_THRESHOLD of the
931 # attempted resync time, then sync forward to c_rev.next_timestamp
932 # - 1...
933 if (new_timestamp - (c_rev.next_timestamp - 1)) \
934 < config.COMMIT_THRESHOLD:
935 new_timestamp = c_rev.next_timestamp - 1
936 Log().write(Log.WARN, "%s: Time set to %s" % (warning_prefix,
937 new_timestamp))
938 else:
939 Log().write(Log.WARN, "%s: Timestamp left untouched" %
940 warning_prefix)
941 continue
943 # Fix for Issue #71: Avoid resyncing two consecutive revisions
944 # to the same timestamp.
945 elif (new_timestamp == c_rev.prev_timestamp
946 or new_timestamp == c_rev.next_timestamp):
947 continue
949 # adjust the time range. we want the COMMIT_THRESHOLD from the
950 # bounds of the earlier/latest commit in this group.
951 record[0] = min(record[0],
952 c_rev.timestamp - config.COMMIT_THRESHOLD/2)
953 record[1] = max(record[1],
954 c_rev.timestamp + config.COMMIT_THRESHOLD/2)
956 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
957 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
958 new_timestamp - c_rev.timestamp)
959 Log().write(Log.VERBOSE, msg)
961 c_rev.timestamp = new_timestamp
962 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
964 # stop looking for hits
965 break
967 output.write(str(c_rev) + "\n")
968 Log().write(Log.QUIET, "Done")
971 class Pass3(Pass):
972 def register_artifacts(self):
973 self._register_temp_file(config.SORTED_REVS_DATAFILE)
974 self._register_temp_file_needed(config.CLEAN_REVS_DATAFILE)
976 def run(self):
977 Log().write(Log.QUIET, "Sorting CVS revisions...")
978 sort_file(artifact_manager.get_temp_file(config.CLEAN_REVS_DATAFILE),
979 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE))
980 Log().write(Log.QUIET, "Done")
983 class Pass4(Pass):
984 def register_artifacts(self):
985 if not Ctx().trunk_only:
986 self._register_temp_file(config.SYMBOL_LAST_CVS_REVS_DB)
987 self._register_temp_file(config.CVS_REVS_DB)
988 self._register_temp_file_needed(config.SORTED_REVS_DATAFILE)
990 def run(self):
991 """Iterate through sorted revs, storing them in a database.
992 If we're not doing a trunk-only conversion, generate the
993 LastSymbolicNameDatabase, which contains the last CVSRevision
994 that is a source for each tag or branch."""
996 Log().write(Log.QUIET,
997 "Copying CVS revision data from flat file to database...")
998 cvs_revs_db = CVSRevisionDatabase(database.DB_OPEN_NEW)
999 if not Ctx().trunk_only:
1000 Log().write(Log.QUIET,
1001 "Finding last CVS revisions for all symbolic names...")
1002 last_sym_name_db = LastSymbolicNameDatabase()
1003 else:
1004 # This is to avoid testing Ctx().trunk_only every time around the loop
1005 class DummyLSNDB:
1006 def noop(*args): pass
1007 log_revision = noop
1008 create_database = noop
1009 last_sym_name_db = DummyLSNDB()
1011 for line in fileinput.FileInput(
1012 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE)):
1013 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
1014 cvs_revs_db.log_revision(c_rev)
1015 last_sym_name_db.log_revision(c_rev)
1016 StatsKeeper().record_c_rev(c_rev)
1018 last_sym_name_db.create_database()
1019 StatsKeeper().archive()
1020 Log().write(Log.QUIET, "Done")
1023 class Pass5(Pass):
1024 """Generate the SVNCommit <-> CVSRevision mapping databases.
1025 CVSCommit._commit also calls SymbolingsLogger to register
1026 CVSRevisions that represent an opening or closing for a path on a
1027 branch or tag. See SymbolingsLogger for more details."""
1029 def register_artifacts(self):
1030 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1031 self._register_temp_file(config.SYMBOL_CLOSINGS_TMP)
1032 self._register_temp_file(config.SVN_REVNUMS_TO_CVS_REVS)
1033 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1034 if not Ctx().trunk_only:
1035 self._register_temp_file_needed(config.SYMBOL_LAST_CVS_REVS_DB)
1036 self._register_temp_file_needed(config.CVS_REVS_DB)
1037 self._register_temp_file_needed(config.TAGS_DB)
1038 self._register_temp_file_needed(config.DEFAULT_BRANCHES_DB)
1039 self._register_temp_file_needed(config.METADATA_DB)
1040 self._register_temp_file_needed(config.SORTED_REVS_DATAFILE)
1042 def run(self):
1043 Log().write(Log.QUIET, "Mapping CVS revisions to Subversion commits...")
1045 aggregator = CVSRevisionAggregator()
1046 for line in fileinput.FileInput(
1047 artifact_manager.get_temp_file(config.SORTED_REVS_DATAFILE)):
1048 c_rev = cvs_revision.parse_cvs_revision(Ctx(), line[:-1])
1049 if not (Ctx().trunk_only and c_rev.branch_name is not None):
1050 aggregator.process_revision(c_rev)
1051 aggregator.flush()
1053 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
1054 StatsKeeper().archive()
1055 Log().write(Log.QUIET, "Done")
1058 class Pass6(Pass):
1059 def register_artifacts(self):
1060 if not Ctx().trunk_only:
1061 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1062 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1064 def run(self):
1065 Log().write(Log.QUIET, "Sorting symbolic name source revisions...")
1067 if not Ctx().trunk_only:
1068 sort_file(
1069 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1070 artifact_manager.get_temp_file(
1071 config.SYMBOL_OPENINGS_CLOSINGS_SORTED))
1072 Log().write(Log.QUIET, "Done")
1075 class Pass7(Pass):
1076 def register_artifacts(self):
1077 if not Ctx().trunk_only:
1078 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1079 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1081 def run(self):
1082 Log().write(Log.QUIET, "Determining offsets for all symbolic names...")
1084 def generate_offsets_for_symbolings():
1085 """This function iterates through all the lines in
1086 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1087 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1088 where SYMBOLIC_NAME is first encountered. This will allow us to
1089 seek to the various offsets in the file and sequentially read only
1090 the openings and closings that we need."""
1092 ###PERF This is a fine example of a db that can be in-memory and
1093 #just flushed to disk when we're done. Later, it can just be sucked
1094 #back into memory.
1095 offsets_db = database.Database(
1096 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB),
1097 database.DB_OPEN_NEW)
1099 file = open(
1100 artifact_manager.get_temp_file(
1101 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1102 'r')
1103 old_sym = ""
1104 while 1:
1105 fpos = file.tell()
1106 line = file.readline()
1107 if not line:
1108 break
1109 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
1110 if sym != old_sym:
1111 Log().write(Log.VERBOSE, " ", sym)
1112 old_sym = sym
1113 offsets_db[sym] = fpos
1115 if not Ctx().trunk_only:
1116 generate_offsets_for_symbolings()
1117 Log().write(Log.QUIET, "Done.")
1120 class Pass8(Pass):
1121 def register_artifacts(self):
1122 self._register_temp_file(config.SVN_MIRROR_REVISIONS_DB)
1123 self._register_temp_file(config.SVN_MIRROR_NODES_DB)
1124 self._register_temp_file_needed(config.CVS_REVS_DB)
1125 self._register_temp_file_needed(config.TAGS_DB)
1126 self._register_temp_file_needed(config.METADATA_DB)
1127 self._register_temp_file_needed(config.SVN_REVNUMS_TO_CVS_REVS)
1128 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1129 if not Ctx().trunk_only:
1130 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1131 self._register_temp_file_needed(config.SYMBOL_OFFSETS_DB)
1133 def run(self):
1134 svncounter = 2 # Repository initialization is 1.
1135 repos = SVNRepositoryMirror()
1136 persistence_manager = PersistenceManager(database.DB_OPEN_READ)
1138 if Ctx().target:
1139 if not Ctx().dry_run:
1140 repos.add_delegate(RepositoryDelegate())
1141 Log().write(Log.QUIET, "Starting Subversion Repository.")
1142 else:
1143 if not Ctx().dry_run:
1144 repos.add_delegate(DumpfileDelegate())
1145 Log().write(Log.QUIET, "Starting Subversion Dumpfile.")
1147 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
1149 while 1:
1150 svn_commit = persistence_manager.get_svn_commit(svncounter)
1151 if not svn_commit:
1152 break
1153 repos.commit(svn_commit)
1154 svncounter += 1
1156 repos.finish()
1159 _passes = [
1160 Pass1(),
1161 Pass2(),
1162 Pass3(),
1163 Pass4(),
1164 Pass5(),
1165 Pass6(),
1166 Pass7(),
1167 Pass8(),
1171 def convert(start_pass, end_pass):
1172 """Convert a CVS repository to an SVN repository."""
1174 artifact_manager.register_temp_file(config.STATISTICS_FILE, convert)
1176 StatsKeeper().set_start_time(time.time())
1178 # Inform the artifact manager when artifacts are created and used:
1179 for the_pass in _passes:
1180 # The statistics object is needed for every pass:
1181 artifact_manager.register_temp_file_needed(
1182 config.STATISTICS_FILE, the_pass)
1183 the_pass.register_artifacts()
1185 # Tell the artifact manager about passes that are being skipped this run:
1186 for the_pass in _passes[0:start_pass - 1]:
1187 artifact_manager.pass_skipped(the_pass)
1189 times = [ None ] * (end_pass + 1)
1190 times[start_pass - 1] = time.time()
1191 for i in range(start_pass - 1, end_pass):
1192 the_pass = _passes[i]
1193 Log().write(Log.QUIET, '----- pass %d -----' % (i + 1))
1194 the_pass.run()
1195 times[i + 1] = time.time()
1196 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
1197 # Dispose of items in Ctx() not intended to live past the end of the pass
1198 # (Identified by exactly one leading underscore)
1199 for attr in dir(Ctx()):
1200 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
1201 and attr[:6] != "_Ctx__"):
1202 delattr(Ctx(), attr)
1203 StatsKeeper().set_end_time(time.time())
1204 # Allow the artifact manager to clean up artifacts that are no
1205 # longer needed:
1206 artifact_manager.pass_done(the_pass)
1208 # Tell the artifact manager about passes that are being deferred:
1209 for the_pass in _passes[end_pass:]:
1210 artifact_manager.pass_deferred(the_pass)
1212 Log().write(Log.QUIET, StatsKeeper())
1213 if end_pass < 4:
1214 Log().write(Log.QUIET,
1215 '(These are unaltered CVS repository stats and do not\n'
1216 ' reflect tags or branches excluded via --exclude)\n')
1217 Log().write(Log.NORMAL, StatsKeeper().timings())
1219 # The overall conversion is done:
1220 artifact_manager.pass_done(convert)
1222 # Consistency check:
1223 artifact_manager.check_clean()
1226 def normalize_ttb_path(opt, path):
1227 """Normalize a path to be used for --trunk, --tags, or --branches.
1229 1. Strip leading, trailing, and duplicated '/'.
1230 2. Verify that the path is not empty.
1232 Return the normalized path.
1234 If the path is invalid, write an error message and exit."""
1236 norm_path = common.path_join(*path.split('/'))
1237 if not norm_path:
1238 raise FatalError("cannot pass an empty path to %s." % (opt,))
1239 return norm_path
1242 def usage():
1243 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
1244 % os.path.basename(sys.argv[0])
1245 print ' --help, -h print this usage message and exit with success'
1246 print ' --version print the version number'
1247 print ' -q quiet'
1248 print ' -v verbose'
1249 print ' -s PATH path for SVN repos'
1250 print ' -p START[:END] start at pass START, end at pass END of %d' \
1251 % len(_passes)
1252 print ' If only START is given, run only pass START'
1253 print ' (implicitly enables --skip-cleanup)'
1254 print ' --existing-svnrepos load into existing SVN repository'
1255 print ' --dumpfile=PATH name of intermediate svn dumpfile'
1256 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
1257 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
1258 print ' --dry-run do not create a repository or a dumpfile;'
1259 print ' just print what would happen.'
1260 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
1261 print ' (only use this if having problems with RCS)'
1262 print ' --svnadmin=PATH path to the svnadmin program'
1263 print ' --trunk-only convert only trunk commits, not tags nor branches'
1264 print ' --trunk=PATH path for trunk (default: %s)' \
1265 % Ctx().trunk_base
1266 print ' --branches=PATH path for branches (default: %s)' \
1267 % Ctx().branches_base
1268 print ' --tags=PATH path for tags (default: %s)' \
1269 % Ctx().tags_base
1270 print ' --no-prune don\'t prune empty directories'
1271 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
1272 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
1273 print ' Multiple of these options may be passed, where they'
1274 print ' will be treated as an ordered list of encodings to'
1275 print ' attempt (with "ascii" as a hardcoded last resort)'
1276 print ' --force-branch=NAME force NAME to be a branch'
1277 print ' --force-tag=NAME force NAME to be a tag'
1278 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
1279 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
1280 print ' use Python regexp and reference syntax respectively'
1281 print ' --username=NAME username for cvs2svn-synthesized commits'
1282 print ' --skip-cleanup prevent the deletion of intermediate files'
1283 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
1284 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
1285 print ' --cvs-revnums record CVS revision numbers as file properties'
1286 print ' --auto-props=FILE set file properties from the auto-props section'
1287 print ' of a file in svn config format'
1288 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
1289 print ' --mime-types=FILE specify an apache-style mime.types file for'
1290 print ' setting svn:mime-type'
1291 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
1292 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
1293 print ' non-binary files with undetermined mime types'
1294 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
1295 print ' cvs2svn sets svn:keywords on non-binary files to'
1296 print ' "%s")' % config.SVN_KEYWORDS_VALUE
1299 def main():
1300 # Convenience var, so we don't have to keep instantiating this Borg.
1301 ctx = Ctx()
1303 profiling = None
1304 start_pass = 1
1305 end_pass = len(_passes)
1307 try:
1308 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
1309 [ "help", "create", "trunk=",
1310 "username=", "existing-svnrepos",
1311 "branches=", "tags=", "encoding=",
1312 "force-branch=", "force-tag=", "exclude=",
1313 "use-cvs", "mime-types=",
1314 "auto-props=", "auto-props-ignore-case",
1315 "eol-from-mime-type", "no-default-eol",
1316 "trunk-only", "no-prune", "dry-run",
1317 "dump-only", "dumpfile=", "tmpdir=",
1318 "svnadmin=", "skip-cleanup", "cvs-revnums",
1319 "bdb-txn-nosync", "fs-type=",
1320 "version", "profile",
1321 "keywords-off", "symbol-transform="])
1322 except getopt.GetoptError, e:
1323 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
1324 usage()
1325 sys.exit(1)
1327 for opt, value in opts:
1328 if opt == '--version':
1329 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
1330 sys.exit(0)
1331 elif opt == '-p':
1332 # Don't cleanup if we're doing incrementals.
1333 ctx.skip_cleanup = 1
1334 if value.find(':') > 0:
1335 start_pass, end_pass = map(int, value.split(':'))
1336 else:
1337 end_pass = start_pass = int(value)
1338 if start_pass > len(_passes) or start_pass < 1:
1339 raise FatalError(
1340 'illegal value (%d) for starting pass. Must be 1 through %d.'
1341 % (int(start_pass), len(_passes),))
1342 if end_pass < start_pass or end_pass > len(_passes):
1343 raise FatalError(
1344 'illegal value (%d) for ending pass. Must be %d through %d.'
1345 % (int(end_pass), int(start_pass), len(_passes),))
1346 elif (opt == '--help') or (opt == '-h'):
1347 ctx.print_help = 1
1348 elif opt == '-v':
1349 Log().log_level = Log.VERBOSE
1350 ctx.verbose = 1
1351 elif opt == '-q':
1352 Log().log_level = Log.QUIET
1353 ctx.quiet = 1
1354 elif opt == '-s':
1355 ctx.target = value
1356 elif opt == '--existing-svnrepos':
1357 ctx.existing_svnrepos = 1
1358 elif opt == '--dumpfile':
1359 ctx.dumpfile = value
1360 elif opt == '--tmpdir':
1361 ctx.tmpdir = value
1362 elif opt == '--use-cvs':
1363 ctx.use_cvs = 1
1364 elif opt == '--svnadmin':
1365 ctx.svnadmin = value
1366 elif opt == '--trunk-only':
1367 ctx.trunk_only = 1
1368 elif opt == '--trunk':
1369 ctx.trunk_base = normalize_ttb_path(opt, value)
1370 elif opt == '--branches':
1371 ctx.branches_base = normalize_ttb_path(opt, value)
1372 elif opt == '--tags':
1373 ctx.tags_base = normalize_ttb_path(opt, value)
1374 elif opt == '--no-prune':
1375 ctx.prune = None
1376 elif opt == '--dump-only':
1377 ctx.dump_only = 1
1378 elif opt == '--dry-run':
1379 ctx.dry_run = 1
1380 elif opt == '--encoding':
1381 ctx.encoding.insert(-1, value)
1382 elif opt == '--force-branch':
1383 ctx.forced_branches.append(value)
1384 elif opt == '--force-tag':
1385 ctx.forced_tags.append(value)
1386 elif opt == '--exclude':
1387 try:
1388 ctx.excludes.append(re.compile('^' + value + '$'))
1389 except re.error, e:
1390 raise FatalError("'%s' is not a valid regexp." % (value,))
1391 elif opt == '--mime-types':
1392 ctx.mime_types_file = value
1393 elif opt == '--auto-props':
1394 ctx.auto_props_file = value
1395 elif opt == '--auto-props-ignore-case':
1396 ctx.auto_props_ignore_case = True
1397 elif opt == '--eol-from-mime-type':
1398 ctx.eol_from_mime_type = 1
1399 elif opt == '--no-default-eol':
1400 ctx.no_default_eol = 1
1401 elif opt == '--keywords-off':
1402 ctx.keywords_off = 1
1403 elif opt == '--username':
1404 ctx.username = value
1405 elif opt == '--skip-cleanup':
1406 ctx.skip_cleanup = 1
1407 elif opt == '--cvs-revnums':
1408 ctx.svn_property_setters.append(
1409 property_setters.CVSRevisionNumberSetter())
1410 elif opt == '--bdb-txn-nosync':
1411 ctx.bdb_txn_nosync = 1
1412 elif opt == '--fs-type':
1413 ctx.fs_type = value
1414 elif opt == '--create':
1415 sys.stderr.write(warning_prefix +
1416 ': The behaviour produced by the --create option is now the '
1417 'default,\nand passing the option is deprecated.\n')
1418 elif opt == '--profile':
1419 profiling = 1
1420 elif opt == '--symbol-transform':
1421 [pattern, replacement] = value.split(":")
1422 try:
1423 pattern = re.compile(pattern)
1424 except re.error, e:
1425 raise FatalError("'%s' is not a valid regexp." % (pattern,))
1426 ctx.symbol_transforms.append((pattern, replacement,))
1428 if ctx.print_help:
1429 usage()
1430 sys.exit(0)
1432 # Consistency check for options and arguments.
1433 if len(args) == 0:
1434 usage()
1435 sys.exit(1)
1437 if len(args) > 1:
1438 sys.stderr.write(error_prefix +
1439 ": must pass only one CVS repository.\n")
1440 usage()
1441 sys.exit(1)
1443 cvsroot = args[0]
1445 if ctx.use_cvs:
1446 ctx.cvs_repository = cvs_repository.CVSRepositoryViaCVS(cvsroot)
1447 else:
1448 ctx.cvs_repository = cvs_repository.CVSRepositoryViaRCS(cvsroot)
1450 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
1451 raise FatalError("must pass one of '-s' or '--dump-only'.")
1453 def not_both(opt1val, opt1name, opt2val, opt2name):
1454 if opt1val and opt2val:
1455 raise FatalError("cannot pass both '%s' and '%s'."
1456 % (opt1name, opt2name,))
1458 not_both(ctx.target, '-s',
1459 ctx.dump_only, '--dump-only')
1461 not_both(ctx.dump_only, '--dump-only',
1462 ctx.existing_svnrepos, '--existing-svnrepos')
1464 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
1465 ctx.existing_svnrepos, '--existing-svnrepos')
1467 not_both(ctx.dump_only, '--dump-only',
1468 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
1470 not_both(ctx.quiet, '-q',
1471 ctx.verbose, '-v')
1473 not_both(ctx.fs_type, '--fs-type',
1474 ctx.existing_svnrepos, '--existing-svnrepos')
1476 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
1477 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
1478 % ctx.fs_type)
1480 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
1481 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
1482 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
1484 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
1485 raise FatalError("the svn-repos-path '%s' is not an "
1486 "existing directory." % ctx.target)
1488 if not ctx.dump_only and not ctx.existing_svnrepos \
1489 and (not ctx.dry_run) and os.path.exists(ctx.target):
1490 raise FatalError("the svn-repos-path '%s' exists.\n"
1491 "Remove it, or pass '--existing-svnrepos'."
1492 % ctx.target)
1494 if ctx.target and not ctx.dry_run:
1495 # Verify that svnadmin can be executed. The 'help' subcommand
1496 # should be harmless.
1497 try:
1498 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
1499 except CommandFailedException, e:
1500 raise FatalError(
1501 '%s\n'
1502 'svnadmin could not be executed. Please ensure that it is\n'
1503 'installed and/or use the --svnadmin option.' % (e,))
1505 ctx.svn_property_setters.append(
1506 property_setters.ExecutablePropertySetter())
1508 ctx.svn_property_setters.append(
1509 property_setters.BinaryFileEOLStyleSetter())
1511 if ctx.mime_types_file:
1512 ctx.svn_property_setters.append(
1513 property_setters.MimeMapper(ctx.mime_types_file))
1515 if ctx.auto_props_file:
1516 ctx.svn_property_setters.append(
1517 property_setters.AutoPropsPropertySetter(
1518 ctx.auto_props_file, ctx.auto_props_ignore_case))
1520 ctx.svn_property_setters.append(
1521 property_setters.BinaryFileDefaultMimeTypeSetter())
1523 if ctx.eol_from_mime_type:
1524 ctx.svn_property_setters.append(
1525 property_setters.EOLStyleFromMimeTypeSetter())
1527 if ctx.no_default_eol:
1528 ctx.svn_property_setters.append(
1529 property_setters.DefaultEOLStyleSetter(None))
1530 else:
1531 ctx.svn_property_setters.append(
1532 property_setters.DefaultEOLStyleSetter('native'))
1534 if not ctx.keywords_off:
1535 ctx.svn_property_setters.append(
1536 property_setters.KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE))
1538 # Make sure the tmp directory exists. Note that we don't check if
1539 # it's empty -- we want to be able to use, for example, "." to hold
1540 # tempfiles. But if we *did* want check if it were empty, we'd do
1541 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
1542 if not os.path.exists(ctx.tmpdir):
1543 os.mkdir(ctx.tmpdir)
1544 elif not os.path.isdir(ctx.tmpdir):
1545 raise FatalError(
1546 "cvs2svn tried to use '%s' for temporary files, but that path\n"
1547 " exists and is not a directory. Please make it be a directory,\n"
1548 " or specify some other directory for temporary files."
1549 % (ctx.tmpdir,))
1551 # But do lock the tmpdir, to avoid process clash.
1552 try:
1553 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
1554 except OSError, e:
1555 if e.errno == errno.EACCES:
1556 raise FatalError("Permission denied:"
1557 + " No write access to directory '%s'." % ctx.tmpdir)
1558 if e.errno == errno.EEXIST:
1559 raise FatalError(
1560 "cvs2svn is using directory '%s' for temporary files, but\n"
1561 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
1562 " cvs2svn process is currently using '%s' as its temporary\n"
1563 " workspace. If you are certain that is not the case,\n"
1564 " then remove the '%s/cvs2svn.lock' subdirectory."
1565 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
1566 raise
1567 try:
1568 if profiling:
1569 import hotshot
1570 prof = hotshot.Profile('cvs2svn.hotshot')
1571 prof.runcall(convert, start_pass, end_pass)
1572 prof.close()
1573 else:
1574 convert(start_pass, end_pass)
1575 finally:
1576 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
1577 except: pass
1580 if __name__ == '__main__':
1581 try:
1582 main()
1583 except FatalException, e:
1584 sys.stderr.write(str(e))
1585 sys.exit(1)