1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.common
import FatalError
30 from cvs2svn_lib
.log
import Log
31 from cvs2svn_lib
.context
import Ctx
32 from cvs2svn_lib
.symbol
import Trunk
33 from cvs2svn_lib
.symbol
import Branch
34 from cvs2svn_lib
.symbol
import Tag
35 from cvs2svn_lib
.cvs_item
import CVSSymbol
36 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
37 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
38 from cvs2svn_lib
.key_generator
import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
53 class GitRevisionWriter(MirrorUpdater
):
55 def start(self
, mirror
, f
):
56 super(GitRevisionWriter
, self
).start(mirror
)
59 def _modify_file(self
, cvs_item
, post_commit
):
60 raise NotImplementedError()
62 def add_file(self
, cvs_rev
, post_commit
):
63 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
64 self
._modify
_file
(cvs_rev
, post_commit
)
66 def modify_file(self
, cvs_rev
, post_commit
):
67 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
68 self
._modify
_file
(cvs_rev
, post_commit
)
70 def delete_file(self
, cvs_rev
, post_commit
):
71 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
72 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
74 def branch_file(self
, cvs_symbol
):
75 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
76 self
._modify
_file
(cvs_symbol
, post_commit
=False)
79 super(GitRevisionWriter
, self
).finish()
83 class GitRevisionMarkWriter(GitRevisionWriter
):
84 def _modify_file(self
, cvs_item
, post_commit
):
85 if cvs_item
.cvs_file
.executable
:
92 % (mode
, cvs_item
.revision_reader_token
,
93 cvs_item
.cvs_file
.cvs_path
,)
97 class GitRevisionInlineWriter(GitRevisionWriter
):
98 def __init__(self
, revision_reader
):
99 self
.revision_reader
= revision_reader
101 def register_artifacts(self
, which_pass
):
102 GitRevisionWriter
.register_artifacts(self
, which_pass
)
103 self
.revision_reader
.register_artifacts(which_pass
)
105 def start(self
, mirror
, f
):
106 GitRevisionWriter
.start(self
, mirror
, f
)
107 self
.revision_reader
.start()
109 def _modify_file(self
, cvs_item
, post_commit
):
110 if cvs_item
.cvs_file
.executable
:
117 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
120 if isinstance(cvs_item
, CVSSymbol
):
121 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 stream
= self
.revision_reader
.get_content_stream(
128 cvs_rev
, suppress_keyword_substitution
=False
130 fulltext
= stream
.read()
133 self
.f
.write('data %d\n' % (len(fulltext
),))
134 self
.f
.write(fulltext
)
138 GitRevisionWriter
.finish(self
)
139 self
.revision_reader
.finish()
142 class GitOutputOption(DVCSOutputOption
):
143 """An OutputOption that outputs to a git-fast-import formatted file.
147 dump_filename -- (string) the name of the file to which the
148 git-fast-import commands for defining revisions will be
151 author_transforms -- a map from CVS author names to git full name
152 and email address. See
153 DVCSOutputOption.normalize_author_transforms() for information
154 about the form of this parameter.
160 # The first mark number used for git-fast-import commit marks. This
161 # value needs to be large to avoid conflicts with blob marks.
162 _first_commit_mark
= 1000000000
165 self
, dump_filename
, revision_writer
,
166 author_transforms
=None,
167 tie_tag_fixup_branches
=False,
171 DUMP_FILENAME is the name of the file to which the git-fast-import
172 commands for defining revisions should be written. (Please note
173 that depending on the style of revision writer, the actual file
174 contents might not be written to this file.)
176 REVISION_WRITER is a GitRevisionWriter that is used to output
177 either the content of revisions or a mark that was previously used
180 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
181 CVS author names to git full name and email address. All of the
182 contents should either be Unicode strings or 8-bit strings encoded
185 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag fixup
186 branch, it should be psuedo-merged (ancestry linked but no content changes)
187 back into its source branch, to dispose of the open head.
190 DVCSOutputOption
.__init
__(self
)
191 self
.dump_filename
= dump_filename
192 self
.revision_writer
= revision_writer
194 self
.author_transforms
= self
.normalize_author_transforms(author_transforms
)
196 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
198 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
200 def register_artifacts(self
, which_pass
):
201 DVCSOutputOption
.register_artifacts(self
, which_pass
)
202 self
.revision_writer
.register_artifacts(which_pass
)
204 def check_symbols(self
, symbol_map
):
205 # FIXME: What constraints does git impose on symbols?
208 def setup(self
, svn_rev_count
):
209 DVCSOutputOption
.setup(self
, svn_rev_count
)
210 self
.f
= open(self
.dump_filename
, 'wb')
212 # The youngest revnum that has been committed so far:
215 # A map {lod : [(revnum, mark)]} giving each of the revision
216 # numbers in which there was a commit to lod, and the mark active
217 # at the end of the revnum.
220 self
.revision_writer
.start(self
._mirror
, self
.f
)
222 def _create_commit_mark(self
, lod
, revnum
):
223 mark
= self
._mark
_generator
.gen_id()
224 self
._set
_lod
_mark
(lod
, revnum
, mark
)
227 def _set_lod_mark(self
, lod
, revnum
, mark
):
228 """Record MARK as the status of LOD for REVNUM.
230 If there is already an entry for REVNUM, overwrite it. If not,
231 append a new entry to the self._marks list for LOD."""
233 assert revnum
>= self
._youngest
234 entry
= (revnum
, mark
)
236 modifications
= self
._marks
[lod
]
238 # This LOD hasn't appeared before; create a new list and add the
240 self
._marks
[lod
] = [entry
]
242 # A record exists, so it necessarily has at least one element:
243 if modifications
[-1][0] == revnum
:
244 modifications
[-1] = entry
246 modifications
.append(entry
)
247 self
._youngest
= revnum
249 def _get_author(self
, svn_commit
):
250 """Return the author to be used for SVN_COMMIT.
252 Return the author as a UTF-8 string in the form needed by git fast-import;
253 that is, 'name <email>'."""
255 cvs_author
= svn_commit
.get_author()
256 return self
._map
_author
(cvs_author
)
258 def _map_author(self
, cvs_author
):
259 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
262 def _get_log_msg(svn_commit
):
263 return svn_commit
.get_log_msg()
265 def process_initial_project_commit(self
, svn_commit
):
266 self
._mirror
.start_commit(svn_commit
.revnum
)
267 self
._mirror
.end_commit()
269 def process_primary_commit(self
, svn_commit
):
270 author
= self
._get
_author
(svn_commit
)
271 log_msg
= self
._get
_log
_msg
(svn_commit
)
274 for cvs_rev
in svn_commit
.get_cvs_items():
275 lods
.add(cvs_rev
.lod
)
277 raise InternalError('Commit affects %d LODs' % (len(lods
),))
280 self
._mirror
.start_commit(svn_commit
.revnum
)
281 if isinstance(lod
, Trunk
):
282 # FIXME: is this correct?:
283 self
.f
.write('commit refs/heads/master\n')
285 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
288 % (self
._create
_commit
_mark
(lod
, svn_commit
.revnum
),)
291 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
293 self
.f
.write('data %d\n' % (len(log_msg
),))
294 self
.f
.write('%s\n' % (log_msg
,))
295 for cvs_rev
in svn_commit
.get_cvs_items():
296 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
299 self
._mirror
.end_commit()
301 def process_post_commit(self
, svn_commit
):
302 author
= self
._get
_author
(svn_commit
)
303 log_msg
= self
._get
_log
_msg
(svn_commit
)
306 for cvs_rev
in svn_commit
.cvs_revs
:
307 source_lods
.add(cvs_rev
.lod
)
308 if len(source_lods
) != 1:
309 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
310 source_lod
= source_lods
.pop()
312 self
._mirror
.start_commit(svn_commit
.revnum
)
313 # FIXME: is this correct?:
314 self
.f
.write('commit refs/heads/master\n')
317 % (self
._create
_commit
_mark
(None, svn_commit
.revnum
),)
320 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
322 self
.f
.write('data %d\n' % (len(log_msg
),))
323 self
.f
.write('%s\n' % (log_msg
,))
326 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
328 for cvs_rev
in svn_commit
.cvs_revs
:
329 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
332 self
._mirror
.end_commit()
334 def _get_source_mark(self
, source_lod
, revnum
):
335 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
337 modifications
= self
._marks
[source_lod
]
338 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
339 (revnum
, mark
) = modifications
[i
]
342 def describe_lod_to_user(self
, lod
):
343 """This needs to make sense to users of the fastimported result."""
344 if isinstance(lod
, Trunk
):
349 def _describe_commit(self
, svn_commit
, lod
):
350 author
= self
._map
_author
(svn_commit
.get_author())
351 if author
.endswith(" <>"):
353 date
= time
.strftime("%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
))
354 log_msg
= svn_commit
.get_log_msg()
355 if log_msg
.find('\n') != -1:
356 log_msg
= log_msg
[:log_msg
.index('\n')]
357 return "%s %s %s '%s'" % (
358 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
360 def _process_symbol_commit(
361 self
, svn_commit
, git_branch
, source_groups
, mark
363 author
= self
._get
_author
(svn_commit
)
364 log_msg
= self
._get
_log
_msg
(svn_commit
)
366 # Get the primary parent
367 p_source_lod
, p_source_revnum
, p_cvs_symbols
= source_groups
[0]
369 p_source_node
= self
._mirror
.get_old_lod_directory(p_source_lod
, p_source_revnum
)
371 raise InternalError('Source %r does not exist' % (p_source_lod
,))
372 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
374 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
375 for cvs_symbol
in cvs_symbols
:
376 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
378 # Write a trailer to the log message which describes the cherrypicks that
379 # make up this symbol creation.
381 log_msg
+= "\nSprout from %s" % (
382 self
._describe
_commit
(Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
384 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
[1:]:
385 log_msg
+= "\nCherrypick from %s:" % (
386 self
._describe
_commit
(Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
388 for cvs_symbol
in cvs_symbols
:
389 log_msg
+= "\n %s" % (cvs_symbol
.cvs_file
.cvs_path
,)
390 if len(cvs_files_to_delete
):
391 log_msg
+= "\nDelete:"
392 for cvs_file
in sorted(cvs_files_to_delete
):
393 log_msg
+= "\n %s" % (cvs_file
.cvs_path
,)
395 self
.f
.write('commit %s\n' % (git_branch
,))
396 self
.f
.write('mark :%d\n' % (mark
,))
397 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
398 self
.f
.write('data %d\n' % (len(log_msg
),))
399 self
.f
.write('%s\n' % (log_msg
,))
401 # Only record actual DVCS ancestry for the primary sprout parent,
402 # all the rest are effectively cherrypicks.
405 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
408 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
409 for cvs_symbol
in cvs_symbols
:
410 self
.revision_writer
.branch_file(cvs_symbol
)
412 for cvs_file
in cvs_files_to_delete
:
413 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
417 def process_branch_commit(self
, svn_commit
):
418 self
._mirror
.start_commit(svn_commit
.revnum
)
420 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
421 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
422 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
424 '%s will be created via a simple copy from %s:r%d'
425 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
427 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
428 self
._set
_symbol
(svn_commit
.symbol
, mark
)
429 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
430 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
433 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
435 self
._process
_symbol
_commit
(
436 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
438 self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
),
441 self
._mirror
.end_commit()
443 def _set_symbol(self
, symbol
, mark
):
444 if isinstance(symbol
, Branch
):
446 elif isinstance(symbol
, Tag
):
449 raise InternalError()
450 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
451 self
.f
.write('from :%d\n' % (mark
,))
453 def get_tag_fixup_branch_name(self
, svn_commit
):
454 # The branch name to use for the "tag fixup branches". The git-fast-import
455 # documentation suggests using 'TAG_FIXUP' (outside of the refs/heads
456 # namespace), but this is currently broken.
457 # Use a name containing '.', which is not allowed in CVS symbols, to avoid
458 # conflicts (though of course a conflict could still result if the user
459 # requests symbol transformations).
460 return 'refs/heads/TAG.FIXUP'
462 def process_tag_commit(self
, svn_commit
):
463 # FIXME: For now we create a fixup branch with the same name as
464 # the tag, then the tag. We never delete the fixup branch.
465 self
._mirror
.start_commit(svn_commit
.revnum
)
467 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
468 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
469 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
471 '%s will be created via a simple copy from %s:r%d'
472 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
474 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
475 self
._set
_symbol
(svn_commit
.symbol
, mark
)
476 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
477 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
480 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
483 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
485 # Create the fixup branch (which might involve making more than
487 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
488 self
._process
_symbol
_commit
(
489 svn_commit
, fixup_branch_name
, source_groups
, mark
492 # Store the mark of the last commit to the fixup branch as the
494 self
._set
_symbol
(svn_commit
.symbol
, mark
)
495 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
498 if self
.tie_tag_fixup_branches
:
499 source_lod
= source_groups
[0][0]
500 source_lod_git_branch
= 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
502 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
503 author
= self
._map
_author
(Ctx().username
)
504 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
506 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
507 self
.f
.write('mark :%d\n' % (mark2
,))
508 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
509 self
.f
.write('data %d\n' % (len(log_msg
),))
510 self
.f
.write('%s\n' % (log_msg
,))
519 self
._mirror
.end_commit()
521 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
522 return Ctx().text_wrapper
.fill(
523 Ctx().tie_tag_ancestry_message
% {
524 'symbol_name' : svn_commit
.symbol
.name
,
529 DVCSOutputOption
.cleanup(self
)
530 self
.revision_writer
.finish()