1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.common
import FatalError
30 from cvs2svn_lib
.log
import Log
31 from cvs2svn_lib
.context
import Ctx
32 from cvs2svn_lib
.symbol
import Trunk
33 from cvs2svn_lib
.symbol
import Branch
34 from cvs2svn_lib
.symbol
import Tag
35 from cvs2svn_lib
.cvs_item
import CVSSymbol
36 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
37 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
38 from cvs2svn_lib
.key_generator
import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
53 class GitRevisionWriter(MirrorUpdater
):
55 def start(self
, mirror
, f
):
56 super(GitRevisionWriter
, self
).start(mirror
)
59 def _modify_file(self
, cvs_item
, post_commit
):
60 raise NotImplementedError()
62 def add_file(self
, cvs_rev
, post_commit
):
63 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
64 self
._modify
_file
(cvs_rev
, post_commit
)
66 def modify_file(self
, cvs_rev
, post_commit
):
67 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
68 self
._modify
_file
(cvs_rev
, post_commit
)
70 def delete_file(self
, cvs_rev
, post_commit
):
71 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
72 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
74 def branch_file(self
, cvs_symbol
):
75 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
76 self
._modify
_file
(cvs_symbol
, post_commit
=False)
79 super(GitRevisionWriter
, self
).finish()
83 class GitRevisionMarkWriter(GitRevisionWriter
):
84 def _modify_file(self
, cvs_item
, post_commit
):
85 if cvs_item
.cvs_file
.executable
:
92 % (mode
, cvs_item
.revision_reader_token
,
93 cvs_item
.cvs_file
.cvs_path
,)
97 class GitRevisionInlineWriter(GitRevisionWriter
):
98 def __init__(self
, revision_reader
):
99 self
.revision_reader
= revision_reader
101 def register_artifacts(self
, which_pass
):
102 GitRevisionWriter
.register_artifacts(self
, which_pass
)
103 self
.revision_reader
.register_artifacts(which_pass
)
105 def start(self
, mirror
, f
):
106 GitRevisionWriter
.start(self
, mirror
, f
)
107 self
.revision_reader
.start()
109 def _modify_file(self
, cvs_item
, post_commit
):
110 if cvs_item
.cvs_file
.executable
:
117 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
120 if isinstance(cvs_item
, CVSSymbol
):
121 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 fulltext
= self
.revision_reader
.get_content(
128 cvs_rev
, suppress_keyword_substitution
=False
131 self
.f
.write('data %d\n' % (len(fulltext
),))
132 self
.f
.write(fulltext
)
136 GitRevisionWriter
.finish(self
)
137 self
.revision_reader
.finish()
140 class GitOutputOption(DVCSOutputOption
):
141 """An OutputOption that outputs to a git-fast-import formatted file.
145 dump_filename -- (string) the name of the file to which the
146 git-fast-import commands for defining revisions will be
149 author_transforms -- a map from CVS author names to git full name
150 and email address. See
151 DVCSOutputOption.normalize_author_transforms() for information
152 about the form of this parameter.
158 # The first mark number used for git-fast-import commit marks. This
159 # value needs to be large to avoid conflicts with blob marks.
160 _first_commit_mark
= 1000000000
163 self
, dump_filename
, revision_writer
,
164 author_transforms
=None,
165 tie_tag_fixup_branches
=False,
169 DUMP_FILENAME is the name of the file to which the git-fast-import
170 commands for defining revisions should be written. (Please note
171 that depending on the style of revision writer, the actual file
172 contents might not be written to this file.)
174 REVISION_WRITER is a GitRevisionWriter that is used to output
175 either the content of revisions or a mark that was previously used
178 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
179 CVS author names to git full name and email address. All of the
180 contents should either be Unicode strings or 8-bit strings encoded
183 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
184 fixup branch, it should be psuedo-merged (ancestry linked but no
185 content changes) back into its source branch, to dispose of the
189 DVCSOutputOption
.__init
__(self
)
190 self
.dump_filename
= dump_filename
191 self
.revision_writer
= revision_writer
193 self
.author_transforms
= self
.normalize_author_transforms(
197 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
199 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
201 def register_artifacts(self
, which_pass
):
202 DVCSOutputOption
.register_artifacts(self
, which_pass
)
203 self
.revision_writer
.register_artifacts(which_pass
)
205 def check_symbols(self
, symbol_map
):
206 # FIXME: What constraints does git impose on symbols?
209 def setup(self
, svn_rev_count
):
210 DVCSOutputOption
.setup(self
, svn_rev_count
)
211 self
.f
= open(self
.dump_filename
, 'wb')
213 # The youngest revnum that has been committed so far:
216 # A map {lod : [(revnum, mark)]} giving each of the revision
217 # numbers in which there was a commit to lod, and the mark active
218 # at the end of the revnum.
221 self
.revision_writer
.start(self
._mirror
, self
.f
)
223 def _create_commit_mark(self
, lod
, revnum
):
224 mark
= self
._mark
_generator
.gen_id()
225 self
._set
_lod
_mark
(lod
, revnum
, mark
)
228 def _set_lod_mark(self
, lod
, revnum
, mark
):
229 """Record MARK as the status of LOD for REVNUM.
231 If there is already an entry for REVNUM, overwrite it. If not,
232 append a new entry to the self._marks list for LOD."""
234 assert revnum
>= self
._youngest
235 entry
= (revnum
, mark
)
237 modifications
= self
._marks
[lod
]
239 # This LOD hasn't appeared before; create a new list and add the
241 self
._marks
[lod
] = [entry
]
243 # A record exists, so it necessarily has at least one element:
244 if modifications
[-1][0] == revnum
:
245 modifications
[-1] = entry
247 modifications
.append(entry
)
248 self
._youngest
= revnum
250 def _get_author(self
, svn_commit
):
251 """Return the author to be used for SVN_COMMIT.
253 Return the author as a UTF-8 string in the form needed by git
254 fast-import; that is, 'name <email>'."""
256 cvs_author
= svn_commit
.get_author()
257 return self
._map
_author
(cvs_author
)
259 def _map_author(self
, cvs_author
):
260 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
263 def _get_log_msg(svn_commit
):
264 return svn_commit
.get_log_msg()
266 def process_initial_project_commit(self
, svn_commit
):
267 self
._mirror
.start_commit(svn_commit
.revnum
)
268 self
._mirror
.end_commit()
270 def process_primary_commit(self
, svn_commit
):
271 author
= self
._get
_author
(svn_commit
)
272 log_msg
= self
._get
_log
_msg
(svn_commit
)
275 for cvs_rev
in svn_commit
.get_cvs_items():
276 lods
.add(cvs_rev
.lod
)
278 raise InternalError('Commit affects %d LODs' % (len(lods
),))
281 self
._mirror
.start_commit(svn_commit
.revnum
)
282 if isinstance(lod
, Trunk
):
283 # FIXME: is this correct?:
284 self
.f
.write('commit refs/heads/master\n')
286 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
289 % (self
._create
_commit
_mark
(lod
, svn_commit
.revnum
),)
292 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
294 self
.f
.write('data %d\n' % (len(log_msg
),))
295 self
.f
.write('%s\n' % (log_msg
,))
296 for cvs_rev
in svn_commit
.get_cvs_items():
297 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
300 self
._mirror
.end_commit()
302 def process_post_commit(self
, svn_commit
):
303 author
= self
._get
_author
(svn_commit
)
304 log_msg
= self
._get
_log
_msg
(svn_commit
)
307 for cvs_rev
in svn_commit
.cvs_revs
:
308 source_lods
.add(cvs_rev
.lod
)
309 if len(source_lods
) != 1:
310 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
311 source_lod
= source_lods
.pop()
313 self
._mirror
.start_commit(svn_commit
.revnum
)
314 # FIXME: is this correct?:
315 self
.f
.write('commit refs/heads/master\n')
318 % (self
._create
_commit
_mark
(None, svn_commit
.revnum
),)
321 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
323 self
.f
.write('data %d\n' % (len(log_msg
),))
324 self
.f
.write('%s\n' % (log_msg
,))
327 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
329 for cvs_rev
in svn_commit
.cvs_revs
:
330 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
333 self
._mirror
.end_commit()
335 def _get_source_mark(self
, source_lod
, revnum
):
336 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
338 modifications
= self
._marks
[source_lod
]
339 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
340 (revnum
, mark
) = modifications
[i
]
343 def describe_lod_to_user(self
, lod
):
344 """This needs to make sense to users of the fastimported result."""
345 if isinstance(lod
, Trunk
):
350 def _describe_commit(self
, svn_commit
, lod
):
351 author
= self
._map
_author
(svn_commit
.get_author())
352 if author
.endswith(" <>"):
354 date
= time
.strftime(
355 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
357 log_msg
= svn_commit
.get_log_msg()
358 if log_msg
.find('\n') != -1:
359 log_msg
= log_msg
[:log_msg
.index('\n')]
360 return "%s %s %s '%s'" % (
361 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
363 def _process_symbol_commit(
364 self
, svn_commit
, git_branch
, source_groups
, mark
366 author
= self
._get
_author
(svn_commit
)
367 log_msg
= self
._get
_log
_msg
(svn_commit
)
369 # Get the primary parent
370 p_source_lod
, p_source_revnum
, p_cvs_symbols
= source_groups
[0]
372 p_source_node
= self
._mirror
.get_old_lod_directory(
373 p_source_lod
, p_source_revnum
376 raise InternalError('Source %r does not exist' % (p_source_lod
,))
377 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
379 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
380 for cvs_symbol
in cvs_symbols
:
381 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
383 # Write a trailer to the log message which describes the cherrypicks that
384 # make up this symbol creation.
386 log_msg
+= "\nSprout from %s" % (
387 self
._describe
_commit
(
388 Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
392 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
[1:]:
393 log_msg
+= "\nCherrypick from %s:" % (
394 self
._describe
_commit
(
395 Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
399 for cvs_symbol
in cvs_symbols
:
400 log_msg
+= "\n %s" % (cvs_symbol
.cvs_file
.cvs_path
,)
401 if len(cvs_files_to_delete
):
402 log_msg
+= "\nDelete:"
403 for cvs_file
in sorted(cvs_files_to_delete
):
404 log_msg
+= "\n %s" % (cvs_file
.cvs_path
,)
406 self
.f
.write('commit %s\n' % (git_branch
,))
407 self
.f
.write('mark :%d\n' % (mark
,))
408 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
409 self
.f
.write('data %d\n' % (len(log_msg
),))
410 self
.f
.write('%s\n' % (log_msg
,))
412 # Only record actual DVCS ancestry for the primary sprout parent,
413 # all the rest are effectively cherrypicks.
416 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
419 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
420 for cvs_symbol
in cvs_symbols
:
421 self
.revision_writer
.branch_file(cvs_symbol
)
423 for cvs_file
in cvs_files_to_delete
:
424 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
428 def process_branch_commit(self
, svn_commit
):
429 self
._mirror
.start_commit(svn_commit
.revnum
)
431 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
432 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
433 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
435 '%s will be created via a simple copy from %s:r%d'
436 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
438 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
439 self
._set
_symbol
(svn_commit
.symbol
, mark
)
440 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
441 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
444 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
446 self
._process
_symbol
_commit
(
447 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
449 self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
),
452 self
._mirror
.end_commit()
454 def _set_symbol(self
, symbol
, mark
):
455 if isinstance(symbol
, Branch
):
457 elif isinstance(symbol
, Tag
):
460 raise InternalError()
461 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
462 self
.f
.write('from :%d\n' % (mark
,))
464 def get_tag_fixup_branch_name(self
, svn_commit
):
465 # The branch name to use for the "tag fixup branches". The
466 # git-fast-import documentation suggests using 'TAG_FIXUP'
467 # (outside of the refs/heads namespace), but this is currently
468 # broken. Use a name containing '.', which is not allowed in CVS
469 # symbols, to avoid conflicts (though of course a conflict could
470 # still result if the user requests symbol transformations).
471 return 'refs/heads/TAG.FIXUP'
473 def process_tag_commit(self
, svn_commit
):
474 # FIXME: For now we create a fixup branch with the same name as
475 # the tag, then the tag. We never delete the fixup branch.
476 self
._mirror
.start_commit(svn_commit
.revnum
)
478 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
479 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
480 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
482 '%s will be created via a simple copy from %s:r%d'
483 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
485 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
486 self
._set
_symbol
(svn_commit
.symbol
, mark
)
487 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
488 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
491 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
494 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
496 # Create the fixup branch (which might involve making more than
498 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
499 self
._process
_symbol
_commit
(
500 svn_commit
, fixup_branch_name
, source_groups
, mark
503 # Store the mark of the last commit to the fixup branch as the
505 self
._set
_symbol
(svn_commit
.symbol
, mark
)
506 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
509 if self
.tie_tag_fixup_branches
:
510 source_lod
= source_groups
[0][0]
511 source_lod_git_branch
= \
512 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
514 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
515 author
= self
._map
_author
(Ctx().username
)
516 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
518 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
519 self
.f
.write('mark :%d\n' % (mark2
,))
520 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
521 self
.f
.write('data %d\n' % (len(log_msg
),))
522 self
.f
.write('%s\n' % (log_msg
,))
531 self
._mirror
.end_commit()
533 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
534 return Ctx().text_wrapper
.fill(
535 Ctx().tie_tag_ancestry_message
% {
536 'symbol_name' : svn_commit
.symbol
.name
,
541 DVCSOutputOption
.cleanup(self
)
542 self
.revision_writer
.finish()