2 # (Be in -*- python -*- mode.)
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION
= 'r' + "$LastChangedRevision$"[22:-2]
22 from __future__
import generators
36 # Try to get access to a bunch of encodings for use with --encoding.
37 # See http://cjkpython.i18n.org/ for details.
42 import cvs2svn_rcsparse
44 from cvs2svn_lib
.boolean
import *
46 from cvs2svn_lib
import config
48 from cvs2svn_lib
import common
49 from cvs2svn_lib
.common
import \
55 from cvs2svn_lib
.log
import Log
57 from cvs2svn_lib
.process
import \
59 CommandFailedException
, \
62 from cvs2svn_lib
import database
63 from cvs2svn_lib
.context
import Ctx
64 from cvs2svn_lib
.artifact_manager
import artifact_manager
65 from cvs2svn_lib
.stats_keeper
import StatsKeeper
66 from cvs2svn_lib
import key_generator
67 from cvs2svn_lib
import cvs_revision
68 from cvs2svn_lib
import cvs_repository
69 from cvs2svn_lib
import property_setters
70 from cvs2svn_lib
.svn_revision_range
import SVNRevisionRange
71 from cvs2svn_lib
.tags_database
import TagsDatabase
72 from cvs2svn_lib
.cvs_revision_database
import CVSRevisionDatabase
73 from cvs2svn_lib
.openings_closings
import \
74 OpeningsClosingsMap
, \
76 from cvs2svn_lib
.fill_source
import FillSource
77 from cvs2svn_lib
.last_symbolic_name_database
import LastSymbolicNameDatabase
78 from cvs2svn_lib
.symbol_database
import SymbolDatabase
79 from cvs2svn_lib
.project
import Project
80 from cvs2svn_lib
import collect_data
81 from cvs2svn_lib
.symbolings_reader
import SymbolingsReader
82 from cvs2svn_lib
.svn_commit_item
import SVNCommitItem
83 from cvs2svn_lib
.svn_commit
import SVNCommit
84 from cvs2svn_lib
.svn_repository_mirror
import \
85 SVNRepositoryMirror
, \
86 SVNRepositoryMirrorDelegate
87 from cvs2svn_lib
.dumpfile_delegate
import DumpfileDelegate
88 from cvs2svn_lib
.repository_delegate
import RepositoryDelegate
89 from cvs2svn_lib
.stdout_delegate
import StdoutDelegate
90 from cvs2svn_lib
.persistence_manager
import PersistenceManager
91 from cvs2svn_lib
.pass_manager
import PassManager
94 # Make sure this Python is recent enough.
95 if sys
.hexversion
< 0x02020000:
96 sys
.stderr
.write("'%s: Python 2.2 or higher required, "
97 "see www.python.org.\n" % error_prefix
)
101 DIGEST_END_IDX
= 9 + (sha
.digestsize
* 2)
104 ctrl_characters_regexp
= re
.compile('[\\\x00-\\\x1f\\\x7f]')
106 def verify_filename_legal(filename
):
107 """Verify that FILENAME does not include any control characters. If
108 it does, raise a FatalError."""
110 m
= ctrl_characters_regexp
.search(filename
)
113 "Character %r in filename %r is not supported by subversion."
114 % (m
.group(), filename
,))
117 def sort_file(infilename
, outfilename
):
118 """Sort file INFILENAME, storing the results to OUTFILENAME."""
120 # GNU sort will sort our dates differently (incorrectly!) if our
121 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
123 lc_all_tmp
= os
.environ
.get('LC_ALL', None)
124 os
.environ
['LC_ALL'] = 'C'
126 # The -T option to sort has a nice side effect. The Win32 sort is
127 # case insensitive and cannot be used, and since it does not
128 # understand the -T option and dies if we try to use it, there is
129 # no risk that we use that sort by accident.
130 run_command('sort -T %s %s > %s'
131 % (Ctx().tmpdir
, infilename
, outfilename
))
133 if lc_all_tmp
is None:
134 del os
.environ
['LC_ALL']
136 os
.environ
['LC_ALL'] = lc_all_tmp
140 """Each instance of this class contains a number of CVS Revisions
141 that correspond to one or more Subversion Commits. After all CVS
142 Revisions are added to the grouping, calling process_revisions will
143 generate a Subversion Commit (or Commits) for the set of CVS
144 Revisions in the grouping."""
146 def __init__(self
, digest
, author
, log
):
151 # Symbolic names for which the last source revision has already
152 # been seen and for which the CVSRevisionAggregator has already
153 # generated a fill SVNCommit. See self.process_revisions().
154 self
.done_symbols
= [ ]
157 # Lists of CVSRevisions
161 # Start out with a t_min higher than any incoming time T, and a
162 # t_max lower than any incoming T. This way the first T will
163 # push t_min down to T, and t_max up to T, naturally (without any
164 # special-casing), and successive times will then ratchet them
165 # outward as appropriate.
169 # This will be set to the SVNCommit that occurs in self._commit.
170 self
.motivating_commit
= None
172 # This is a list of all non-primary commits motivated by the main
173 # commit. We gather these so that we can set their dates to the
174 # same date as the primary commit.
175 self
.secondary_commits
= [ ]
177 # State for handling default branches.
179 # Here is a tempting, but ultimately nugatory, bit of logic, which
180 # I share with you so you may appreciate the less attractive, but
181 # refreshingly non-nugatory, logic which follows it:
183 # If some of the commits in this txn happened on a non-trunk
184 # default branch, then those files will have to be copied into
185 # trunk manually after being changed on the branch (because the
186 # RCS "default branch" appears as head, i.e., trunk, in practice).
187 # As long as those copies don't overwrite any trunk paths that
188 # were also changed in this commit, then we can do the copies in
189 # the same revision, because they won't cover changes that don't
190 # appear anywhere/anywhen else. However, if some of the trunk dst
191 # paths *did* change in this commit, then immediately copying the
192 # branch changes would lose those trunk mods forever. So in this
193 # case, we need to do at least that copy in its own revision. And
194 # for simplicity's sake, if we're creating the new revision for
195 # even one file, then we just do all such copies together in the
198 # Doesn't that sound nice?
200 # Unfortunately, Subversion doesn't support copies with sources
201 # in the current txn. All copies must be based in committed
202 # revisions. Therefore, we generate the above-described new
203 # revision unconditionally.
205 # This is a list of c_revs, and a c_rev is appended for each
206 # default branch commit that will need to be copied to trunk (or
207 # deleted from trunk) in some generated revision following the
208 # "regular" revision.
209 self
.default_branch_cvs_revisions
= [ ]
211 def __cmp__(self
, other
):
212 # Commits should be sorted by t_max. If both self and other have
213 # the same t_max, break the tie using t_min, and lastly, digest.
214 # If all those are equal, then compare based on ids, to ensure
215 # that no two instances compare equal.
216 return (cmp(self
.t_max
, other
.t_max
) or cmp(self
.t_min
, other
.t_min
)
217 or cmp(self
.digest
, other
.digest
) or cmp(id(self
), id(other
)))
222 def has_file(self
, fname
):
223 return self
.files
.has_key(fname
)
226 return self
.changes
+ self
.deletes
228 def opens_symbolic_name(self
, name
):
229 """Return True if any CVSRevision in this commit is on a tag or a
230 branch or is the origin of a tag or branch."""
232 for c_rev
in self
.revisions():
233 if c_rev
.opens_symbolic_name(name
):
237 def add_revision(self
, c_rev
):
238 # Record the time range of this commit.
240 # ### ISSUE: It's possible, though unlikely, that the time range
241 # of a commit could get gradually expanded to be arbitrarily
242 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
243 # problem, and anyway deciding where to break it up would be a
244 # judgement call. For now, we just print a warning in commit() if
246 if c_rev
.timestamp
< self
.t_min
:
247 self
.t_min
= c_rev
.timestamp
248 if c_rev
.timestamp
> self
.t_max
:
249 self
.t_max
= c_rev
.timestamp
251 if c_rev
.op
== common
.OP_DELETE
:
252 self
.deletes
.append(c_rev
)
254 # OP_CHANGE or OP_ADD
255 self
.changes
.append(c_rev
)
257 self
.files
[c_rev
.fname
] = 1
259 def _pre_commit(self
):
260 """Generates any SVNCommits that must exist before the main commit."""
262 # There may be multiple c_revs in this commit that would cause
263 # branch B to be filled, but we only want to fill B once. On the
264 # other hand, there might be multiple branches committed on in
265 # this commit. Whatever the case, we should count exactly one
266 # commit per branch, because we only fill a branch once per
267 # CVSCommit. This list tracks which branches we've already
269 accounted_for_sym_names
= [ ]
271 def fill_needed(c_rev
, pm
):
272 """Return 1 if this is the first commit on a new branch (for
273 this file) and we need to fill the branch; else return 0
274 (meaning that some other file's first commit on the branch has
275 already done the fill for us).
277 If C_REV.op is OP_ADD, only return 1 if the branch that this
278 commit is on has no last filled revision.
280 PM is a PersistenceManager to query."""
282 # Different '.' counts indicate that c_rev is now on a different
283 # line of development (and may need a fill)
284 if c_rev
.rev
.count('.') != c_rev
.prev_rev
.rev
.count('.'):
285 svn_revnum
= pm
.get_svn_revnum(c_rev
.prev_rev
.unique_key())
286 # It should be the case that when we have a file F that
287 # is added on branch B (thus, F on trunk is in state
288 # 'dead'), we generate an SVNCommit to fill B iff the branch
289 # has never been filled before.
291 # If this c_rev.op == OP_ADD, *and* the branch has never
292 # been filled before, then fill it now. Otherwise, no need to
294 if c_rev
.op
== common
.OP_ADD
:
295 if pm
.last_filled
.get(c_rev
.branch_name
, None) is None:
297 elif c_rev
.op
== common
.OP_CHANGE
:
298 if svn_revnum
> pm
.last_filled
.get(c_rev
.branch_name
, 0):
300 elif c_rev
.op
== common
.OP_DELETE
:
301 if pm
.last_filled
.get(c_rev
.branch_name
, None) is None:
305 for c_rev
in self
.changes
+ self
.deletes
:
306 # If a commit is on a branch, we must ensure that the branch
307 # path being committed exists (in HEAD of the Subversion
308 # repository). If it doesn't exist, we will need to fill the
309 # branch. After the fill, the path on which we're committing
311 if c_rev
.branch_name \
312 and c_rev
.branch_name
not in accounted_for_sym_names \
313 and c_rev
.branch_name
not in self
.done_symbols \
314 and fill_needed(c_rev
, Ctx()._persistence
_manager
):
315 svn_commit
= SVNCommit("pre-commit symbolic name '%s'"
317 svn_commit
.set_symbolic_name(c_rev
.branch_name
)
318 self
.secondary_commits
.append(svn_commit
)
319 accounted_for_sym_names
.append(c_rev
.branch_name
)
322 """Generates the primary SVNCommit that corresponds to this
325 # Generate an SVNCommit unconditionally. Even if the only change
326 # in this CVSCommit is a deletion of an already-deleted file (that
327 # is, a CVS revision in state 'dead' whose predecessor was also in
328 # state 'dead'), the conversion will still generate a Subversion
329 # revision containing the log message for the second dead
330 # revision, because we don't want to lose that information.
331 svn_commit
= SVNCommit("commit")
332 self
.motivating_commit
= svn_commit
334 for c_rev
in self
.changes
:
335 svn_commit
.add_revision(c_rev
)
336 # Only make a change if we need to:
337 if c_rev
.rev
== "1.1.1.1" and not c_rev
.deltatext_exists
:
338 # When 1.1.1.1 has an empty deltatext, the explanation is
339 # almost always that we're looking at an imported file whose
340 # 1.1 and 1.1.1.1 are identical. On such imports, CVS creates
341 # an RCS file where 1.1 has the content, and 1.1.1.1 has an
342 # empty deltatext, i.e, the same content as 1.1. There's no
343 # reason to reflect this non-change in the repository, so we
344 # want to do nothing in this case. (If we were really
345 # paranoid, we could make sure 1.1's log message is the
346 # CVS-generated "Initial revision\n", but I think the
347 # conditions above are strict enough.)
350 if c_rev
.is_default_branch_revision():
351 self
.default_branch_cvs_revisions
.append(c_rev
)
353 for c_rev
in self
.deletes
:
354 # When a file is added on a branch, CVS not only adds the file
355 # on the branch, but generates a trunk revision (typically
356 # 1.1) for that file in state 'dead'. We only want to add
357 # this revision if the log message is not the standard cvs
358 # fabricated log message.
359 if c_rev
.prev_rev
is None:
360 # c_rev.branches may be empty if the originating branch
362 if not c_rev
.branches
:
364 cvs_generated_msg
= ('file %s was initially added on branch %s.\n'
368 Ctx()._persistence
_manager
.svn_commit_metadata
[c_rev
.digest
]
369 if log_msg
== cvs_generated_msg
:
372 svn_commit
.add_revision(c_rev
)
373 if c_rev
.is_default_branch_revision():
374 self
.default_branch_cvs_revisions
.append(c_rev
)
376 # There is a slight chance that we didn't actually register any
377 # CVSRevisions with our SVNCommit (see loop over self.deletes
378 # above), so if we have no CVSRevisions, we don't flush the
379 # svn_commit to disk and roll back our revnum.
380 if len(svn_commit
.cvs_revs
) > 0:
381 svn_commit
.set_date(self
.t_max
)
384 # We will not be flushing this SVNCommit, so rollback the
385 # SVNCommit revision counter.
386 SVNCommit
.revnum
-= 1
388 if not Ctx().trunk_only
:
389 for c_rev
in self
.revisions():
390 Ctx()._symbolings
_logger
.log_revision(c_rev
, svn_commit
.revnum
)
392 def _post_commit(self
):
393 """Generates any SVNCommits that we can perform now that _commit
394 has happened. That is, handle non-trunk default branches.
395 Sometimes an RCS file has a non-trunk default branch, so a commit
396 on that default branch would be visible in a default CVS checkout
397 of HEAD. If we don't copy that commit over to Subversion's trunk,
398 then there will be no Subversion tree which corresponds to that
399 CVS checkout. Of course, in order to copy the path over, we may
400 first need to delete the existing trunk there."""
402 # Only generate a commit if we have default branch revs
403 if len(self
.default_branch_cvs_revisions
):
404 # Generate an SVNCommit for all of our default branch c_revs.
405 svn_commit
= SVNCommit("post-commit default branch(es)")
406 svn_commit
.set_motivating_revnum(self
.motivating_commit
.revnum
)
407 for c_rev
in self
.default_branch_cvs_revisions
:
408 svn_commit
.add_revision(c_rev
)
409 Ctx()._symbolings
_logger
.log_default_branch_closing(
410 c_rev
, svn_commit
.revnum
)
411 self
.secondary_commits
.append(svn_commit
)
413 def process_revisions(self
, done_symbols
):
414 """Process all the CVSRevisions that this instance has, creating
415 one or more SVNCommits in the process. Generate fill SVNCommits
416 only for symbols not in DONE_SYMBOLS (avoids unnecessary
419 Return the primary SVNCommit that corresponds to this CVSCommit.
420 The returned SVNCommit is the commit that motivated any other
421 SVNCommits generated in this CVSCommit."""
423 self
.done_symbols
= done_symbols
424 seconds
= self
.t_max
- self
.t_min
+ 1
426 Log().write(Log
.VERBOSE
, '-' * 60)
427 Log().write(Log
.VERBOSE
, 'CVS Revision grouping:')
429 Log().write(Log
.VERBOSE
, ' Start time: %s (duration: 1 second)'
430 % time
.ctime(self
.t_max
))
432 Log().write(Log
.VERBOSE
, ' Start time: %s' % time
.ctime(self
.t_min
))
433 Log().write(Log
.VERBOSE
, ' End time: %s (duration: %d seconds)'
434 % (time
.ctime(self
.t_max
), seconds
))
436 if seconds
> config
.COMMIT_THRESHOLD
+ 1:
437 Log().write(Log
.WARN
, '%s: grouping spans more than %d seconds'
438 % (warning_prefix
, config
.COMMIT_THRESHOLD
))
440 if Ctx().trunk_only
: # Only do the primary commit if we're trunk-only
442 return self
.motivating_commit
448 for svn_commit
in self
.secondary_commits
:
449 svn_commit
.set_date(self
.motivating_commit
.get_date())
452 return self
.motivating_commit
455 class CVSRevisionAggregator
:
456 """This class groups CVSRevisions into CVSCommits that represent
457 at least one SVNCommit."""
460 self
.metadata_db
= database
.Database(
461 artifact_manager
.get_temp_file(config
.METADATA_DB
),
462 database
.DB_OPEN_READ
)
463 if not Ctx().trunk_only
:
464 self
.last_revs_db
= database
.Database(
465 artifact_manager
.get_temp_file(config
.SYMBOL_LAST_CVS_REVS_DB
),
466 database
.DB_OPEN_READ
)
468 # A map { key : CVSCommit } of CVS commits currently being
469 # accumulated. If the CVSCommit is still open to further
470 # CVSRevisions, then key is CVSRevision.digest. If not (because
471 # an inbound commit wanted to affect a file that was already
472 # within the CVSCommit), then key is CVSRevision.digest plus some
473 # number of appended '-'.
474 self
.cvs_commits
= {}
476 # List of ready commits.
477 self
.ready_queue
= [ ]
479 # A map { symbol : None } of symbolic names for which the last
480 # source CVSRevision has already been processed but which haven't
482 self
.pending_symbols
= {}
484 # A list of closed symbols. That is, we've already encountered
485 # the last CVSRevision that is a source for that symbol, the final
486 # fill for this symbol has been done, and we never need to fill it
488 self
.done_symbols
= [ ]
490 # This variable holds the most recently created primary svn_commit
491 # object. CVSRevisionAggregator maintains this variable merely
492 # for its date, so that it can set dates for the SVNCommits
493 # created in self._attempt_to_commit_symbols().
494 self
.latest_primary_svn_commit
= None
496 Ctx()._symbolings
_logger
= SymbolingsLogger()
497 Ctx()._persistence
_manager
= PersistenceManager(database
.DB_OPEN_NEW
)
498 Ctx()._default
_branches
_db
= database
.SDatabase(
499 artifact_manager
.get_temp_file(config
.DEFAULT_BRANCHES_DB
),
500 database
.DB_OPEN_READ
)
502 def _extract_ready_commits(self
, timestamp
=None):
503 """Extract any active commits that expire by TIMESTAMP from
504 self.cvs_commits and append them to self.ready_queue. If
505 TIMESTAMP is not specified, then extract all commits."""
507 for digest_key
, cvs_commit
in self
.cvs_commits
.items():
508 if timestamp
is None \
509 or cvs_commit
.t_max
+ config
.COMMIT_THRESHOLD
< timestamp
:
510 self
.ready_queue
.append(cvs_commit
)
511 del self
.cvs_commits
[digest_key
]
513 def _commit_ready_commits(self
, timestamp
=None):
514 """Sort the commits from self.ready_queue by time, then process
515 them in order. If TIMESTAMP is specified, only process commits
516 that have timestamp previous to TIMESTAMP."""
518 self
.ready_queue
.sort()
519 while self
.ready_queue
and \
520 (timestamp
is None or self
.ready_queue
[0].t_max
< timestamp
):
521 cvs_commit
= self
.ready_queue
.pop(0)
522 self
.latest_primary_svn_commit
= \
523 cvs_commit
.process_revisions(self
.done_symbols
)
524 self
._attempt
_to
_commit
_symbols
()
526 def process_revision(self
, c_rev
):
527 # Each time we read a new line, scan the accumulating commits to
528 # see if any are ready for processing.
529 self
._extract
_ready
_commits
(c_rev
.timestamp
)
531 for digest_key
, cvs_commit
in self
.cvs_commits
.items():
532 # If the inbound commit is on the same file as a pending commit,
533 # close the pending commit to further changes. Don't flush it though,
534 # as there may be other pending commits dated before this one.
535 # ### ISSUE: the has_file() check below is not optimal.
536 # It does fix the dataloss bug where revisions would get lost
537 # if checked in too quickly, but it can also break apart the
538 # commits. The correct fix would require tracking the dependencies
539 # between change sets and committing them in proper order.
540 if cvs_commit
.has_file(c_rev
.fname
):
541 unused_id
= digest_key
+ '-'
542 # Find a string that does is not already a key in
543 # the self.cvs_commits dict
544 while self
.cvs_commits
.has_key(unused_id
):
546 self
.cvs_commits
[unused_id
] = cvs_commit
547 del self
.cvs_commits
[digest_key
]
549 # Add this item into the set of still-available commits.
550 if self
.cvs_commits
.has_key(c_rev
.digest
):
551 cvs_commit
= self
.cvs_commits
[c_rev
.digest
]
553 author
, log
= self
.metadata_db
[c_rev
.digest
]
554 cvs_commit
= CVSCommit(c_rev
.digest
, author
, log
)
555 self
.cvs_commits
[c_rev
.digest
] = cvs_commit
556 cvs_commit
.add_revision(c_rev
)
558 # Any elements in self.ready_queue at this point need to be
559 # processed, because this latest rev couldn't possibly be part of
561 self
._commit
_ready
_commits
()
563 self
._add
_pending
_symbols
(c_rev
)
566 """Commit anything left in self.cvs_commits. Then inform the
567 SymbolingsLogger that all commits are done."""
569 self
._extract
_ready
_commits
()
570 self
._commit
_ready
_commits
()
572 if not Ctx().trunk_only
:
573 Ctx()._symbolings
_logger
.close()
575 def _add_pending_symbols(self
, c_rev
):
576 """Add to self.pending_symbols any symbols from C_REV for which
577 C_REV is the last CVSRevision.
579 If we're not doing a trunk-only conversion, get the symbolic names
580 that this c_rev is the last *source* CVSRevision for and add them
581 to those left over from previous passes through the aggregator."""
583 if not Ctx().trunk_only
:
584 for sym
in self
.last_revs_db
.get(c_rev
.unique_key(), []):
585 self
.pending_symbols
[sym
] = None
587 def _attempt_to_commit_symbols(self
):
588 """Generate one SVNCommit for each symbol in self.pending_symbols
589 that doesn't have an opening CVSRevision in either self.ready_queue
590 or self.cvs_commits.values()."""
592 # Make a list of all symbols from self.pending_symbols that do not
593 # have *source* CVSRevisions in the pending commit queues
594 # (self.cvs_commits or self.ready_queue):
595 closeable_symbols
= []
596 pending_commits
= self
.cvs_commits
.values() + self
.ready_queue
597 for sym
in self
.pending_symbols
:
598 for cvs_commit
in pending_commits
:
599 if cvs_commit
.opens_symbolic_name(sym
):
602 closeable_symbols
.append(sym
)
604 # Sort the closeable symbols so that we will always process the
605 # symbols in the same order, regardless of the order in which the
606 # dict hashing algorithm hands them back to us. We do this so
607 # that our tests will get the same results on all platforms.
608 closeable_symbols
.sort()
609 for sym
in closeable_symbols
:
610 svn_commit
= SVNCommit("closing tag/branch '%s'" % sym
)
611 svn_commit
.set_symbolic_name(sym
)
612 svn_commit
.set_date(self
.latest_primary_svn_commit
.get_date())
614 self
.done_symbols
.append(sym
)
615 del self
.pending_symbols
[sym
]
619 """Base class for one step of the conversion."""
622 # By default, use the pass object's class name as the pass name:
623 self
.name
= self
.__class
__.__name
__
625 def register_artifacts(self
):
626 """Register artifacts (created and needed) in artifact_manager."""
628 raise NotImplementedError
630 def _register_temp_file(self
, basename
):
631 """Helper method; for brevity only."""
633 artifact_manager
.register_temp_file(basename
, self
)
635 def _register_temp_file_needed(self
, basename
):
636 """Helper method; for brevity only."""
638 artifact_manager
.register_temp_file_needed(basename
, self
)
641 """Carry out this step of the conversion."""
643 raise NotImplementedError
646 class CollectRevsPass(Pass
):
647 """This pass was formerly known as pass1."""
649 def register_artifacts(self
):
650 self
._register
_temp
_file
(config
.TAGS_LIST
)
651 self
._register
_temp
_file
(config
.BRANCHES_LIST
)
652 self
._register
_temp
_file
(config
.REVS_DATAFILE
)
653 self
._register
_temp
_file
(config
.RESYNC_DATAFILE
)
654 self
._register
_temp
_file
(config
.DEFAULT_BRANCHES_DB
)
655 self
._register
_temp
_file
(config
.METADATA_DB
)
658 OS_SEP_PLUS_ATTIC
= os
.sep
+ 'Attic'
659 Log().write(Log
.QUIET
, "Examining all CVS ',v' files...")
660 cd
= collect_data
.CollectData()
662 def visit_file(baton
, dirname
, files
):
665 verify_filename_legal(fname
)
666 if not fname
.endswith(',v'):
668 cd
.found_valid_file
= 1
669 pathname
= os
.path
.join(dirname
, fname
)
670 if dirname
.endswith(OS_SEP_PLUS_ATTIC
):
671 # drop the 'Attic' portion from the pathname for the canonical name.
672 fdc
= collect_data
.FileDataCollector(
673 cd
, os
.path
.join(dirname
[:-6], fname
), pathname
)
675 # If this file also exists in the attic, it's a fatal error
676 attic_path
= os
.path
.join(dirname
, 'Attic', fname
)
677 if os
.path
.exists(attic_path
):
678 err
= "%s: A CVS repository cannot contain both %s and %s" \
679 % (error_prefix
, pathname
, attic_path
)
680 sys
.stderr
.write(err
+ '\n')
681 cd
.fatal_errors
.append(err
)
682 fdc
= collect_data
.FileDataCollector(cd
, pathname
, pathname
)
683 Log().write(Log
.NORMAL
, pathname
)
685 cvs2svn_rcsparse
.parse(open(pathname
, 'rb'), fdc
)
686 except (cvs2svn_rcsparse
.common
.RCSParseError
, ValueError,
688 err
= "%s: '%s' is not a valid ,v file" \
689 % (error_prefix
, pathname
)
690 sys
.stderr
.write(err
+ '\n')
691 cd
.fatal_errors
.append(err
)
693 Log().write(Log
.WARN
,
694 "Exception occurred while parsing %s" % pathname
)
697 os
.path
.walk(Ctx().project
.project_cvs_repos_path
, visit_file
, cd
)
698 Log().write(Log
.VERBOSE
, 'Processed', cd
.num_files
, 'files')
702 if len(cd
.fatal_errors
) > 0:
703 raise FatalException("Pass 1 complete.\n"
706 + "\n".join(cd
.fatal_errors
) + "\n"
707 + "Exited due to fatal error(s).\n")
709 if cd
.found_valid_file
is None:
710 raise FatalException(
712 "No RCS files found in your CVS Repository!\n"
713 "Are you absolutely certain you are pointing cvs2svn\n"
714 "at a CVS repository?\n"
716 "Exited due to fatal error(s).\n")
718 StatsKeeper().reset_c_rev_info()
719 StatsKeeper().archive()
720 Log().write(Log
.QUIET
, "Done")
723 class ResyncRevsPass(Pass
):
724 """Clean up the revision information.
726 This pass was formerly known as pass2."""
728 def register_artifacts(self
):
729 self
._register
_temp
_file
(config
.TAGS_DB
)
730 self
._register
_temp
_file
(config
.CLEAN_REVS_DATAFILE
)
731 self
._register
_temp
_file
(config
.TWEAKED_TIMESTAMPS_DB
)
732 self
._register
_temp
_file
_needed
(config
.TAGS_LIST
)
733 self
._register
_temp
_file
_needed
(config
.BRANCHES_LIST
)
734 self
._register
_temp
_file
_needed
(config
.REVS_DATAFILE
)
735 self
._register
_temp
_file
_needed
(config
.RESYNC_DATAFILE
)
738 symbol_db
= SymbolDatabase()
741 # Convert the list of regexps to a list of strings
742 excludes
= symbol_db
.find_excluded_symbols(Ctx().excludes
)
746 Log().write(Log
.QUIET
, "Checking for blocked exclusions...")
747 blocked_excludes
= symbol_db
.find_blocked_excludes(excludes
)
749 for branch
, blockers
in blocked_excludes
.items():
750 sys
.stderr
.write(error_prefix
+ ": The branch '%s' cannot be "
751 "excluded because the following symbols depend "
752 "on it:\n" % (branch
))
753 for blocker
in blockers
:
754 sys
.stderr
.write(" '%s'\n" % (blocker
))
755 sys
.stderr
.write("\n")
758 Log().write(Log
.QUIET
, "Checking for forced tags with commits...")
759 invalid_forced_tags
= [ ]
760 for forced_tag
in Ctx().forced_tags
:
761 if excludes
.has_key(forced_tag
):
763 if symbol_db
.branch_has_commit(forced_tag
):
764 invalid_forced_tags
.append(forced_tag
)
765 if invalid_forced_tags
:
766 sys
.stderr
.write(error_prefix
+ ": The following branches cannot be "
767 "forced to be tags because they have commits:\n")
768 for tag
in invalid_forced_tags
:
769 sys
.stderr
.write(" '%s'\n" % (tag
))
770 sys
.stderr
.write("\n")
773 Log().write(Log
.QUIET
, "Checking for tag/branch mismatches...")
774 mismatches
= symbol_db
.find_mismatches(excludes
)
775 def is_not_forced(mismatch
):
777 return not (name
in Ctx().forced_tags
or name
in Ctx().forced_branches
)
778 mismatches
= filter(is_not_forced
, mismatches
)
780 sys
.stderr
.write(error_prefix
+ ": The following symbols are tags "
781 "in some files and branches in others.\nUse "
782 "--force-tag, --force-branch and/or --exclude to "
783 "resolve the symbols.\n")
784 for name
, tag_count
, branch_count
, commit_count
in mismatches
:
785 sys
.stderr
.write(" '%s' is a tag in %d files, a branch in "
786 "%d files and has commits in %d files.\n"
787 % (name
, tag_count
, branch_count
, commit_count
))
790 # Bail out now if we found errors
794 # Create the tags database
795 tags_db
= TagsDatabase(database
.DB_OPEN_NEW
)
796 for tag
in symbol_db
.tags
:
797 if tag
not in Ctx().forced_branches
:
799 for tag
in Ctx().forced_tags
:
802 Log().write(Log
.QUIET
, "Re-synchronizing CVS revision timestamps...")
804 # We may have recorded some changes in revisions' timestamp. We need to
805 # scan for any other files which may have had the same log message and
806 # occurred at "the same time" and change their timestamps, too.
808 # read the resync data file
809 def read_resync(fname
):
810 """Read the .resync file into memory."""
812 ### note that we assume that we can hold the entire resync file in
813 ### memory. really large repositories with whacky timestamps could
814 ### bust this assumption. should that ever happen, then it is possible
815 ### to split the resync file into pieces and make multiple passes,
816 ### using each piece.
819 # A digest maps to a sequence of lists which specify a lower and upper
820 # time bound for matching up the commit. We keep a sequence of these
821 # because a number of checkins with the same log message (e.g. an empty
822 # log message) could need to be remapped. We also make them a list
823 # because we will dynamically expand the lower/upper bound as we find
824 # commits that fall into a particular msg and time range.
826 # resync == digest -> [[old_time_lower, old_time_upper, new_time], ...]
830 for line
in fileinput
.FileInput(fname
):
831 t1
= int(line
[:8], 16)
832 digest
= line
[9:DIGEST_END_IDX
]
833 t2
= int(line
[DIGEST_END_IDX
+1:], 16)
834 t1_l
= t1
- config
.COMMIT_THRESHOLD
/2
835 t1_u
= t1
+ config
.COMMIT_THRESHOLD
/2
836 resync
.setdefault(digest
, []).append([t1_l
, t1_u
, t2
])
838 # For each digest, sort the resync items in it in increasing order,
839 # based on the lower time bound.
840 for val
in resync
.values():
845 resync
= read_resync(
846 artifact_manager
.get_temp_file(config
.RESYNC_DATAFILE
))
848 output
= open(artifact_manager
.get_temp_file(config
.CLEAN_REVS_DATAFILE
),
851 tweaked_timestamps_db
= database
.Database(
852 artifact_manager
.get_temp_file(config
.TWEAKED_TIMESTAMPS_DB
),
853 database
.DB_OPEN_NEW
)
855 # process the revisions file, looking for items to clean up
856 for line
in fileinput
.FileInput(
857 artifact_manager
.get_temp_file(config
.REVS_DATAFILE
)):
858 c_rev
= cvs_revision
.parse_cvs_revision(Ctx(), line
[:-1])
860 # Skip this entire revision if it's on an excluded branch
861 if excludes
.has_key(c_rev
.branch_name
):
865 if c_rev
.prev_rev
is not None:
866 new_prev_ts
= tweaked_timestamps_db
.get(
867 c_rev
.prev_rev
.unique_key(), None)
869 c_rev
.prev_timestamp
= new_prev_ts
872 if c_rev
.next_rev
is not None:
873 new_next_ts
= tweaked_timestamps_db
.get(
874 c_rev
.next_rev
.unique_key(), None)
876 c_rev
.next_timestamp
= new_next_ts
878 # Remove all references to excluded tags and branches
879 def not_excluded(symbol
, excludes
=excludes
):
880 return not excludes
.has_key(symbol
)
881 c_rev
.branches
= filter(not_excluded
, c_rev
.branches
)
882 c_rev
.tags
= filter(not_excluded
, c_rev
.tags
)
884 # Convert all branches that are forced to be tags
885 for forced_tag
in Ctx().forced_tags
:
886 if forced_tag
in c_rev
.branches
:
887 c_rev
.branches
.remove(forced_tag
)
888 c_rev
.tags
.append(forced_tag
)
890 # Convert all tags that are forced to be branches
891 for forced_branch
in Ctx().forced_branches
:
892 if forced_branch
in c_rev
.tags
:
893 c_rev
.tags
.remove(forced_branch
)
894 c_rev
.branches
.append(forced_branch
)
896 # see if this is "near" any of the resync records we
897 # have recorded for this digest [of the log message].
898 for record
in resync
.get(c_rev
.digest
, []):
899 if record
[2] == c_rev
.timestamp
:
900 # This means that either c_rev is the same revision that
901 # caused the resync record to exist, or c_rev is a different
902 # CVS revision that happens to have the same timestamp. In
903 # either case, we don't have to do anything, so we...
906 if record
[0] <= c_rev
.timestamp
<= record
[1]:
907 # bingo! We probably want to remap the time on this c_rev,
908 # unless the remapping would be useless because the new time
909 # would fall outside the COMMIT_THRESHOLD window for this
911 new_timestamp
= record
[2]
912 # If the new timestamp is earlier than that of our previous revision
913 if new_timestamp
< c_rev
.prev_timestamp
:
914 desc
= ("%s: Attempt to set timestamp of revision %s on file %s"
915 + " to time %s, which is before previous the time of"
916 + " revision %s (%s):")
917 Log().write(Log
.WARN
, desc
% (warning_prefix
, c_rev
.rev
,
918 c_rev
.cvs_path
, new_timestamp
,
920 c_rev
.prev_timestamp
))
921 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
922 # the timestamp of c_rev within COMMIT_THRESHOLD of the
923 # attempted resync time, then sync back to c_rev.prev_timestamp
925 if ((c_rev
.prev_timestamp
+ 1) - new_timestamp
) \
926 < config
.COMMIT_THRESHOLD
:
927 new_timestamp
= c_rev
.prev_timestamp
+ 1
928 Log().write(Log
.WARN
, "%s: Time set to %s" % (warning_prefix
,
931 Log().write(Log
.WARN
, "%s: Timestamp left untouched" %
935 # If the new timestamp is later than that of our next revision
936 elif c_rev
.next_timestamp
and new_timestamp
> c_rev
.next_timestamp
:
937 desc
= ("%s: Attempt to set timestamp of revision %s on file %s"
938 + " to time %s, which is after time of next"
939 + " revision %s (%s):")
940 Log().write(Log
.WARN
, desc
% (warning_prefix
, c_rev
.rev
,
941 c_rev
.cvs_path
, new_timestamp
,
943 c_rev
.next_timestamp
))
944 # If resyncing our rev to c_rev.next_timestamp - 1 will place
945 # the timestamp of c_rev within COMMIT_THRESHOLD of the
946 # attempted resync time, then sync forward to c_rev.next_timestamp
948 if (new_timestamp
- (c_rev
.next_timestamp
- 1)) \
949 < config
.COMMIT_THRESHOLD
:
950 new_timestamp
= c_rev
.next_timestamp
- 1
951 Log().write(Log
.WARN
, "%s: Time set to %s" % (warning_prefix
,
954 Log().write(Log
.WARN
, "%s: Timestamp left untouched" %
958 # Fix for Issue #71: Avoid resyncing two consecutive revisions
959 # to the same timestamp.
960 elif (new_timestamp
== c_rev
.prev_timestamp
961 or new_timestamp
== c_rev
.next_timestamp
):
964 # adjust the time range. we want the COMMIT_THRESHOLD from the
965 # bounds of the earlier/latest commit in this group.
966 record
[0] = min(record
[0],
967 c_rev
.timestamp
- config
.COMMIT_THRESHOLD
/2)
968 record
[1] = max(record
[1],
969 c_rev
.timestamp
+ config
.COMMIT_THRESHOLD
/2)
971 msg
= "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
972 % (c_rev
.cvs_path
, c_rev
.rev
, time
.ctime(c_rev
.timestamp
),
973 new_timestamp
- c_rev
.timestamp
)
974 Log().write(Log
.VERBOSE
, msg
)
976 c_rev
.timestamp
= new_timestamp
977 tweaked_timestamps_db
[c_rev
.unique_key()] = new_timestamp
979 # stop looking for hits
982 output
.write(str(c_rev
) + "\n")
983 Log().write(Log
.QUIET
, "Done")
986 class SortRevsPass(Pass
):
987 """This pass was formerly known as pass3."""
989 def register_artifacts(self
):
990 self
._register
_temp
_file
(config
.SORTED_REVS_DATAFILE
)
991 self
._register
_temp
_file
_needed
(config
.CLEAN_REVS_DATAFILE
)
994 Log().write(Log
.QUIET
, "Sorting CVS revisions...")
995 sort_file(artifact_manager
.get_temp_file(config
.CLEAN_REVS_DATAFILE
),
996 artifact_manager
.get_temp_file(config
.SORTED_REVS_DATAFILE
))
997 Log().write(Log
.QUIET
, "Done")
1000 class CreateDatabasesPass(Pass
):
1001 """This pass was formerly known as pass4."""
1003 def register_artifacts(self
):
1004 if not Ctx().trunk_only
:
1005 self
._register
_temp
_file
(config
.SYMBOL_LAST_CVS_REVS_DB
)
1006 self
._register
_temp
_file
(config
.CVS_REVS_DB
)
1007 self
._register
_temp
_file
_needed
(config
.SORTED_REVS_DATAFILE
)
1010 """Iterate through sorted revs, storing them in a database.
1011 If we're not doing a trunk-only conversion, generate the
1012 LastSymbolicNameDatabase, which contains the last CVSRevision
1013 that is a source for each tag or branch."""
1015 Log().write(Log
.QUIET
,
1016 "Copying CVS revision data from flat file to database...")
1017 cvs_revs_db
= CVSRevisionDatabase(database
.DB_OPEN_NEW
)
1018 if not Ctx().trunk_only
:
1019 Log().write(Log
.QUIET
,
1020 "Finding last CVS revisions for all symbolic names...")
1021 last_sym_name_db
= LastSymbolicNameDatabase()
1023 # This is to avoid testing Ctx().trunk_only every time around the loop
1025 def noop(*args
): pass
1027 create_database
= noop
1028 last_sym_name_db
= DummyLSNDB()
1030 for line
in fileinput
.FileInput(
1031 artifact_manager
.get_temp_file(config
.SORTED_REVS_DATAFILE
)):
1032 c_rev
= cvs_revision
.parse_cvs_revision(Ctx(), line
[:-1])
1033 cvs_revs_db
.log_revision(c_rev
)
1034 last_sym_name_db
.log_revision(c_rev
)
1035 StatsKeeper().record_c_rev(c_rev
)
1037 StatsKeeper().set_stats_reflect_exclude(True)
1039 last_sym_name_db
.create_database()
1040 StatsKeeper().archive()
1041 Log().write(Log
.QUIET
, "Done")
1044 class AggregateRevsPass(Pass
):
1045 """Generate the SVNCommit <-> CVSRevision mapping databases.
1046 CVSCommit._commit also calls SymbolingsLogger to register
1047 CVSRevisions that represent an opening or closing for a path on a
1048 branch or tag. See SymbolingsLogger for more details.
1050 This pass was formerly known as pass5."""
1052 def register_artifacts(self
):
1053 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1054 self
._register
_temp
_file
(config
.SYMBOL_CLOSINGS_TMP
)
1055 self
._register
_temp
_file
(config
.SVN_REVNUMS_TO_CVS_REVS
)
1056 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1057 if not Ctx().trunk_only
:
1058 self
._register
_temp
_file
_needed
(config
.SYMBOL_LAST_CVS_REVS_DB
)
1059 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DB
)
1060 self
._register
_temp
_file
_needed
(config
.TAGS_DB
)
1061 self
._register
_temp
_file
_needed
(config
.DEFAULT_BRANCHES_DB
)
1062 self
._register
_temp
_file
_needed
(config
.METADATA_DB
)
1063 self
._register
_temp
_file
_needed
(config
.SORTED_REVS_DATAFILE
)
1066 Log().write(Log
.QUIET
, "Mapping CVS revisions to Subversion commits...")
1068 aggregator
= CVSRevisionAggregator()
1069 for line
in fileinput
.FileInput(
1070 artifact_manager
.get_temp_file(config
.SORTED_REVS_DATAFILE
)):
1071 c_rev
= cvs_revision
.parse_cvs_revision(Ctx(), line
[:-1])
1072 if not (Ctx().trunk_only
and c_rev
.branch_name
is not None):
1073 aggregator
.process_revision(c_rev
)
1076 StatsKeeper().set_svn_rev_count(SVNCommit
.revnum
- 1)
1077 StatsKeeper().archive()
1078 Log().write(Log
.QUIET
, "Done")
1081 class SortSymbolsPass(Pass
):
1082 """This pass was formerly known as pass6."""
1084 def register_artifacts(self
):
1085 if not Ctx().trunk_only
:
1086 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1087 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1090 Log().write(Log
.QUIET
, "Sorting symbolic name source revisions...")
1092 if not Ctx().trunk_only
:
1094 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1095 artifact_manager
.get_temp_file(
1096 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
))
1097 Log().write(Log
.QUIET
, "Done")
1100 class IndexSymbolsPass(Pass
):
1101 """This pass was formerly known as pass7."""
1103 def register_artifacts(self
):
1104 if not Ctx().trunk_only
:
1105 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1106 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1109 Log().write(Log
.QUIET
, "Determining offsets for all symbolic names...")
1111 def generate_offsets_for_symbolings():
1112 """This function iterates through all the lines in
1113 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1114 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1115 where SYMBOLIC_NAME is first encountered. This will allow us to
1116 seek to the various offsets in the file and sequentially read only
1117 the openings and closings that we need."""
1119 ###PERF This is a fine example of a db that can be in-memory and
1120 #just flushed to disk when we're done. Later, it can just be sucked
1122 offsets_db
= database
.Database(
1123 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
),
1124 database
.DB_OPEN_NEW
)
1127 artifact_manager
.get_temp_file(
1128 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1133 line
= file.readline()
1136 sym
, svn_revnum
, cvs_rev_key
= line
.split(" ", 2)
1138 Log().write(Log
.VERBOSE
, " ", sym
)
1140 offsets_db
[sym
] = fpos
1142 if not Ctx().trunk_only
:
1143 generate_offsets_for_symbolings()
1144 Log().write(Log
.QUIET
, "Done.")
1147 class OutputPass(Pass
):
1148 """This pass was formerly known as pass8."""
1150 def register_artifacts(self
):
1151 self
._register
_temp
_file
(config
.SVN_MIRROR_REVISIONS_DB
)
1152 self
._register
_temp
_file
(config
.SVN_MIRROR_NODES_DB
)
1153 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DB
)
1154 self
._register
_temp
_file
_needed
(config
.TAGS_DB
)
1155 self
._register
_temp
_file
_needed
(config
.METADATA_DB
)
1156 self
._register
_temp
_file
_needed
(config
.SVN_REVNUMS_TO_CVS_REVS
)
1157 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1158 if not Ctx().trunk_only
:
1159 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1160 self
._register
_temp
_file
_needed
(config
.SYMBOL_OFFSETS_DB
)
1163 svncounter
= 2 # Repository initialization is 1.
1164 repos
= SVNRepositoryMirror()
1165 persistence_manager
= PersistenceManager(database
.DB_OPEN_READ
)
1168 if not Ctx().dry_run
:
1169 repos
.add_delegate(RepositoryDelegate())
1170 Log().write(Log
.QUIET
, "Starting Subversion Repository.")
1172 if not Ctx().dry_run
:
1173 repos
.add_delegate(DumpfileDelegate())
1174 Log().write(Log
.QUIET
, "Starting Subversion Dumpfile.")
1176 repos
.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
1179 svn_commit
= persistence_manager
.get_svn_commit(svncounter
)
1182 repos
.commit(svn_commit
)
1188 pass_manager
= PassManager([
1192 CreateDatabasesPass(),
1193 AggregateRevsPass(),
1202 for i
in range(len(pass_manager
.passes
)):
1203 print '%5d : %s' % (i
+ 1, pass_manager
.passes
[i
].name
,)
1206 def normalize_ttb_path(opt
, path
):
1207 """Normalize a path to be used for --trunk, --tags, or --branches.
1209 1. Strip leading, trailing, and duplicated '/'.
1210 2. Verify that the path is not empty.
1212 Return the normalized path.
1214 If the path is invalid, write an error message and exit."""
1216 norm_path
= common
.path_join(*path
.split('/'))
1218 raise FatalError("cannot pass an empty path to %s." % (opt
,))
1223 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
1224 % os
.path
.basename(sys
.argv
[0])
1225 print ' --help, -h print this usage message and exit with success'
1226 print ' --help-passes list the available passes and their numbers'
1227 print ' --version print the version number'
1230 print ' -s PATH path for SVN repos'
1231 print ' -p START[:END] start at pass START, end at pass END of %d' \
1232 % len(pass_manager
.passes
)
1233 print ' If only START is given, run only pass START'
1234 print ' (implicitly enables --skip-cleanup)'
1235 print ' --existing-svnrepos load into existing SVN repository'
1236 print ' --dumpfile=PATH name of intermediate svn dumpfile'
1237 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
1238 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
1239 print ' --dry-run do not create a repository or a dumpfile;'
1240 print ' just print what would happen.'
1241 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
1242 print ' (only use this if having problems with RCS)'
1243 print ' --svnadmin=PATH path to the svnadmin program'
1244 print ' --trunk-only convert only trunk commits, not tags nor branches'
1245 print ' --trunk=PATH path for trunk (default: %s)' \
1247 print ' --branches=PATH path for branches (default: %s)' \
1248 % Ctx().branches_base
1249 print ' --tags=PATH path for tags (default: %s)' \
1251 print ' --no-prune don\'t prune empty directories'
1252 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
1253 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
1254 print ' Multiple of these options may be passed, where they'
1255 print ' will be treated as an ordered list of encodings to'
1256 print ' attempt (with "ascii" as a hardcoded last resort)'
1257 print ' --force-branch=NAME force NAME to be a branch'
1258 print ' --force-tag=NAME force NAME to be a tag'
1259 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
1260 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
1261 print ' use Python regexp and reference syntax respectively'
1262 print ' --username=NAME username for cvs2svn-synthesized commits'
1263 print ' --skip-cleanup prevent the deletion of intermediate files'
1264 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
1265 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
1266 print ' --cvs-revnums record CVS revision numbers as file properties'
1267 print ' --auto-props=FILE set file properties from the auto-props section'
1268 print ' of a file in svn config format'
1269 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
1270 print ' --mime-types=FILE specify an apache-style mime.types file for'
1271 print ' setting svn:mime-type'
1272 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
1273 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
1274 print ' non-binary files with undetermined mime types'
1275 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
1276 print ' cvs2svn sets svn:keywords on non-binary files to'
1277 print ' "%s")' % config
.SVN_KEYWORDS_VALUE
1281 # Convenience var, so we don't have to keep instantiating this Borg.
1286 end_pass
= len(pass_manager
.passes
)
1289 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'p:s:qvh',
1290 [ "help", "help-passes", "create", "trunk=",
1291 "username=", "existing-svnrepos",
1292 "branches=", "tags=", "encoding=",
1293 "force-branch=", "force-tag=", "exclude=",
1294 "use-cvs", "mime-types=",
1295 "auto-props=", "auto-props-ignore-case",
1296 "eol-from-mime-type", "no-default-eol",
1297 "trunk-only", "no-prune", "dry-run",
1298 "dump-only", "dumpfile=", "tmpdir=",
1299 "svnadmin=", "skip-cleanup", "cvs-revnums",
1300 "bdb-txn-nosync", "fs-type=",
1301 "version", "profile",
1302 "keywords-off", "symbol-transform="])
1303 except getopt
.GetoptError
, e
:
1304 sys
.stderr
.write(error_prefix
+ ': ' + str(e
) + '\n\n')
1308 for opt
, value
in opts
:
1309 if opt
== '--version':
1310 print '%s version %s' % (os
.path
.basename(sys
.argv
[0]), VERSION
)
1313 # Don't cleanup if we're doing incrementals.
1314 ctx
.skip_cleanup
= 1
1315 if value
.find(':') > 0:
1316 start_pass
, end_pass
= map(int, value
.split(':'))
1318 end_pass
= start_pass
= int(value
)
1319 if start_pass
> len(pass_manager
.passes
) or start_pass
< 1:
1321 'illegal value (%d) for starting pass. Must be 1 through %d.'
1322 % (int(start_pass
), len(pass_manager
.passes
),))
1323 if end_pass
< start_pass
or end_pass
> len(pass_manager
.passes
):
1325 'illegal value (%d) for ending pass. Must be %d through %d.'
1326 % (int(end_pass
), int(start_pass
), len(pass_manager
.passes
),))
1327 elif (opt
== '--help') or (opt
== '-h'):
1329 elif opt
== '--help-passes':
1333 Log().log_level
= Log
.VERBOSE
1336 Log().log_level
= Log
.QUIET
1340 elif opt
== '--existing-svnrepos':
1341 ctx
.existing_svnrepos
= 1
1342 elif opt
== '--dumpfile':
1343 ctx
.dumpfile
= value
1344 elif opt
== '--tmpdir':
1346 elif opt
== '--use-cvs':
1348 elif opt
== '--svnadmin':
1349 ctx
.svnadmin
= value
1350 elif opt
== '--trunk-only':
1352 elif opt
== '--trunk':
1353 ctx
.trunk_base
= normalize_ttb_path(opt
, value
)
1354 elif opt
== '--branches':
1355 ctx
.branches_base
= normalize_ttb_path(opt
, value
)
1356 elif opt
== '--tags':
1357 ctx
.tags_base
= normalize_ttb_path(opt
, value
)
1358 elif opt
== '--no-prune':
1360 elif opt
== '--dump-only':
1362 elif opt
== '--dry-run':
1364 elif opt
== '--encoding':
1365 ctx
.encoding
.insert(-1, value
)
1366 elif opt
== '--force-branch':
1367 ctx
.forced_branches
.append(value
)
1368 elif opt
== '--force-tag':
1369 ctx
.forced_tags
.append(value
)
1370 elif opt
== '--exclude':
1372 ctx
.excludes
.append(re
.compile('^' + value
+ '$'))
1374 raise FatalError("'%s' is not a valid regexp." % (value
,))
1375 elif opt
== '--mime-types':
1376 ctx
.mime_types_file
= value
1377 elif opt
== '--auto-props':
1378 ctx
.auto_props_file
= value
1379 elif opt
== '--auto-props-ignore-case':
1380 ctx
.auto_props_ignore_case
= True
1381 elif opt
== '--eol-from-mime-type':
1382 ctx
.eol_from_mime_type
= 1
1383 elif opt
== '--no-default-eol':
1384 ctx
.no_default_eol
= 1
1385 elif opt
== '--keywords-off':
1386 ctx
.keywords_off
= 1
1387 elif opt
== '--username':
1388 ctx
.username
= value
1389 elif opt
== '--skip-cleanup':
1390 ctx
.skip_cleanup
= 1
1391 elif opt
== '--cvs-revnums':
1392 ctx
.svn_property_setters
.append(
1393 property_setters
.CVSRevisionNumberSetter())
1394 elif opt
== '--bdb-txn-nosync':
1395 ctx
.bdb_txn_nosync
= 1
1396 elif opt
== '--fs-type':
1398 elif opt
== '--create':
1399 sys
.stderr
.write(warning_prefix
+
1400 ': The behaviour produced by the --create option is now the '
1401 'default,\nand passing the option is deprecated.\n')
1402 elif opt
== '--profile':
1404 elif opt
== '--symbol-transform':
1405 [pattern
, replacement
] = value
.split(":")
1407 pattern
= re
.compile(pattern
)
1409 raise FatalError("'%s' is not a valid regexp." % (pattern
,))
1410 ctx
.symbol_transforms
.append((pattern
, replacement
,))
1416 # Consistency check for options and arguments.
1422 sys
.stderr
.write(error_prefix
+
1423 ": must pass only one CVS repository.\n")
1430 ctx
.cvs_repository
= cvs_repository
.CVSRepositoryViaCVS(cvsroot
)
1432 ctx
.cvs_repository
= cvs_repository
.CVSRepositoryViaRCS(cvsroot
)
1434 if (not ctx
.target
) and (not ctx
.dump_only
) and (not ctx
.dry_run
):
1435 raise FatalError("must pass one of '-s' or '--dump-only'.")
1437 def not_both(opt1val
, opt1name
, opt2val
, opt2name
):
1438 if opt1val
and opt2val
:
1439 raise FatalError("cannot pass both '%s' and '%s'."
1440 % (opt1name
, opt2name
,))
1442 not_both(ctx
.target
, '-s',
1443 ctx
.dump_only
, '--dump-only')
1445 not_both(ctx
.dump_only
, '--dump-only',
1446 ctx
.existing_svnrepos
, '--existing-svnrepos')
1448 not_both(ctx
.bdb_txn_nosync
, '--bdb-txn-nosync',
1449 ctx
.existing_svnrepos
, '--existing-svnrepos')
1451 not_both(ctx
.dump_only
, '--dump-only',
1452 ctx
.bdb_txn_nosync
, '--bdb-txn-nosync')
1454 not_both(ctx
.quiet
, '-q',
1457 not_both(ctx
.fs_type
, '--fs-type',
1458 ctx
.existing_svnrepos
, '--existing-svnrepos')
1460 if ctx
.fs_type
and ctx
.fs_type
!= 'bdb' and ctx
.bdb_txn_nosync
:
1461 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
1464 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
1465 ctx
.project
= Project(ctx
.cvs_repository
.cvs_repos_path
,
1466 ctx
.trunk_base
, ctx
.branches_base
, ctx
.tags_base
)
1468 if ctx
.existing_svnrepos
and not os
.path
.isdir(ctx
.target
):
1469 raise FatalError("the svn-repos-path '%s' is not an "
1470 "existing directory." % ctx
.target
)
1472 if not ctx
.dump_only
and not ctx
.existing_svnrepos \
1473 and (not ctx
.dry_run
) and os
.path
.exists(ctx
.target
):
1474 raise FatalError("the svn-repos-path '%s' exists.\n"
1475 "Remove it, or pass '--existing-svnrepos'."
1478 if ctx
.target
and not ctx
.dry_run
:
1479 # Verify that svnadmin can be executed. The 'help' subcommand
1480 # should be harmless.
1482 check_command_runs([ctx
.svnadmin
, 'help'], 'svnadmin')
1483 except CommandFailedException
, e
:
1486 'svnadmin could not be executed. Please ensure that it is\n'
1487 'installed and/or use the --svnadmin option.' % (e
,))
1489 ctx
.svn_property_setters
.append(
1490 property_setters
.ExecutablePropertySetter())
1492 ctx
.svn_property_setters
.append(
1493 property_setters
.BinaryFileEOLStyleSetter())
1495 if ctx
.mime_types_file
:
1496 ctx
.svn_property_setters
.append(
1497 property_setters
.MimeMapper(ctx
.mime_types_file
))
1499 if ctx
.auto_props_file
:
1500 ctx
.svn_property_setters
.append(
1501 property_setters
.AutoPropsPropertySetter(
1502 ctx
.auto_props_file
, ctx
.auto_props_ignore_case
))
1504 ctx
.svn_property_setters
.append(
1505 property_setters
.BinaryFileDefaultMimeTypeSetter())
1507 if ctx
.eol_from_mime_type
:
1508 ctx
.svn_property_setters
.append(
1509 property_setters
.EOLStyleFromMimeTypeSetter())
1511 if ctx
.no_default_eol
:
1512 ctx
.svn_property_setters
.append(
1513 property_setters
.DefaultEOLStyleSetter(None))
1515 ctx
.svn_property_setters
.append(
1516 property_setters
.DefaultEOLStyleSetter('native'))
1518 if not ctx
.keywords_off
:
1519 ctx
.svn_property_setters
.append(
1520 property_setters
.KeywordsPropertySetter(config
.SVN_KEYWORDS_VALUE
))
1522 # Make sure the tmp directory exists. Note that we don't check if
1523 # it's empty -- we want to be able to use, for example, "." to hold
1524 # tempfiles. But if we *did* want check if it were empty, we'd do
1525 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
1526 if not os
.path
.exists(ctx
.tmpdir
):
1527 os
.mkdir(ctx
.tmpdir
)
1528 elif not os
.path
.isdir(ctx
.tmpdir
):
1530 "cvs2svn tried to use '%s' for temporary files, but that path\n"
1531 " exists and is not a directory. Please make it be a directory,\n"
1532 " or specify some other directory for temporary files."
1535 # But do lock the tmpdir, to avoid process clash.
1537 os
.mkdir(os
.path
.join(ctx
.tmpdir
, 'cvs2svn.lock'))
1539 if e
.errno
== errno
.EACCES
:
1540 raise FatalError("Permission denied:"
1541 + " No write access to directory '%s'." % ctx
.tmpdir
)
1542 if e
.errno
== errno
.EEXIST
:
1544 "cvs2svn is using directory '%s' for temporary files, but\n"
1545 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
1546 " cvs2svn process is currently using '%s' as its temporary\n"
1547 " workspace. If you are certain that is not the case,\n"
1548 " then remove the '%s/cvs2svn.lock' subdirectory."
1549 % (ctx
.tmpdir
, ctx
.tmpdir
, ctx
.tmpdir
, ctx
.tmpdir
,))
1554 prof
= hotshot
.Profile('cvs2svn.hotshot')
1555 prof
.runcall(pass_manager
.run
, start_pass
, end_pass
)
1558 pass_manager
.run(start_pass
, end_pass
)
1560 try: os
.rmdir(os
.path
.join(ctx
.tmpdir
, 'cvs2svn.lock'))
1564 if __name__
== '__main__':
1567 except FatalException
, e
:
1568 sys
.stderr
.write(str(e
))