1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.common
import FatalError
30 from cvs2svn_lib
.log
import Log
31 from cvs2svn_lib
.context
import Ctx
32 from cvs2svn_lib
.symbol
import Trunk
33 from cvs2svn_lib
.symbol
import Branch
34 from cvs2svn_lib
.symbol
import Tag
35 from cvs2svn_lib
.cvs_item
import CVSSymbol
36 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
37 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
38 from cvs2svn_lib
.key_generator
import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
53 class GitRevisionWriter(MirrorUpdater
):
55 def start(self
, mirror
, f
):
56 super(GitRevisionWriter
, self
).start(mirror
)
59 def _modify_file(self
, cvs_item
, post_commit
):
60 raise NotImplementedError()
62 def add_file(self
, cvs_rev
, post_commit
):
63 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
64 self
._modify
_file
(cvs_rev
, post_commit
)
66 def modify_file(self
, cvs_rev
, post_commit
):
67 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
68 self
._modify
_file
(cvs_rev
, post_commit
)
70 def delete_file(self
, cvs_rev
, post_commit
):
71 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
72 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
74 def branch_file(self
, cvs_symbol
):
75 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
76 self
._modify
_file
(cvs_symbol
, post_commit
=False)
79 super(GitRevisionWriter
, self
).finish()
83 class GitRevisionMarkWriter(GitRevisionWriter
):
84 def _modify_file(self
, cvs_item
, post_commit
):
85 if cvs_item
.cvs_file
.executable
:
92 % (mode
, cvs_item
.revision_recorder_token
,
93 cvs_item
.cvs_file
.cvs_path
,)
97 class GitRevisionInlineWriter(GitRevisionWriter
):
98 def __init__(self
, revision_reader
):
99 self
.revision_reader
= revision_reader
101 def register_artifacts(self
, which_pass
):
102 GitRevisionWriter
.register_artifacts(self
, which_pass
)
103 self
.revision_reader
.register_artifacts(which_pass
)
105 def start(self
, mirror
, f
):
106 GitRevisionWriter
.start(self
, mirror
, f
)
107 self
.revision_reader
.start()
109 def _modify_file(self
, cvs_item
, post_commit
):
110 if cvs_item
.cvs_file
.executable
:
117 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
120 if isinstance(cvs_item
, CVSSymbol
):
121 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 stream
= self
.revision_reader
.get_content_stream(
128 cvs_rev
, suppress_keyword_substitution
=False
130 fulltext
= stream
.read()
133 self
.f
.write('data %d\n' % (len(fulltext
),))
134 self
.f
.write(fulltext
)
138 GitRevisionWriter
.finish(self
)
139 self
.revision_reader
.finish()
142 class GitOutputOption(DVCSOutputOption
):
143 """An OutputOption that outputs to a git-fast-import formatted file.
147 dump_filename -- (string) the name of the file to which the
148 git-fast-import commands for defining revisions will be
151 author_transforms -- a map {cvsauthor : (fullname, email)} from
152 CVS author names to git full name and email address. All of
153 the contents are 8-bit strings encoded as UTF-8.
159 # The first mark number used for git-fast-import commit marks. This
160 # value needs to be large to avoid conflicts with blob marks.
161 _first_commit_mark
= 1000000000
164 self
, dump_filename
, revision_writer
,
165 author_transforms
=None,
166 tie_tag_fixup_branches
=False,
170 DUMP_FILENAME is the name of the file to which the git-fast-import
171 commands for defining revisions should be written. (Please note
172 that depending on the style of revision writer, the actual file
173 contents might not be written to this file.)
175 REVISION_WRITER is a GitRevisionWriter that is used to output
176 either the content of revisions or a mark that was previously used
179 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
180 CVS author names to git full name and email address. All of the
181 contents should either be Unicode strings or 8-bit strings encoded
184 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag fixup
185 branch, it should be psuedo-merged (ancestry linked but no content changes)
186 back into its source branch, to dispose of the open head.
189 DVCSOutputOption
.__init
__(self
)
190 self
.dump_filename
= dump_filename
191 self
.revision_writer
= revision_writer
193 self
.author_transforms
= self
.normalize_author_transforms(author_transforms
)
195 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
197 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
199 def register_artifacts(self
, which_pass
):
200 DVCSOutputOption
.register_artifacts(self
, which_pass
)
201 self
.revision_writer
.register_artifacts(which_pass
)
203 def check_symbols(self
, symbol_map
):
204 # FIXME: What constraints does git impose on symbols?
207 def setup(self
, svn_rev_count
):
208 DVCSOutputOption
.setup(self
, svn_rev_count
)
209 self
.f
= open(self
.dump_filename
, 'wb')
211 # The youngest revnum that has been committed so far:
214 # A map {lod : [(revnum, mark)]} giving each of the revision
215 # numbers in which there was a commit to lod, and the mark active
216 # at the end of the revnum.
219 self
.revision_writer
.start(self
._mirror
, self
.f
)
221 def _create_commit_mark(self
, lod
, revnum
):
222 mark
= self
._mark
_generator
.gen_id()
223 self
._set
_lod
_mark
(lod
, revnum
, mark
)
226 def _set_lod_mark(self
, lod
, revnum
, mark
):
227 """Record MARK as the status of LOD for REVNUM.
229 If there is already an entry for REVNUM, overwrite it. If not,
230 append a new entry to the self._marks list for LOD."""
232 assert revnum
>= self
._youngest
233 entry
= (revnum
, mark
)
235 modifications
= self
._marks
[lod
]
237 # This LOD hasn't appeared before; create a new list and add the
239 self
._marks
[lod
] = [entry
]
241 # A record exists, so it necessarily has at least one element:
242 if modifications
[-1][0] == revnum
:
243 modifications
[-1] = entry
245 modifications
.append(entry
)
246 self
._youngest
= revnum
248 def _get_author(self
, svn_commit
):
249 """Return the author to be used for SVN_COMMIT.
251 Return the author as a UTF-8 string in the form needed by git fast-import;
252 that is, 'name <email>'."""
254 cvs_author
= svn_commit
.get_author()
255 return self
._map
_author
(cvs_author
)
257 def _map_author(self
, cvs_author
):
258 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
261 def _get_log_msg(svn_commit
):
262 return svn_commit
.get_log_msg()
264 def process_initial_project_commit(self
, svn_commit
):
265 self
._mirror
.start_commit(svn_commit
.revnum
)
266 self
._mirror
.end_commit()
268 def process_primary_commit(self
, svn_commit
):
269 author
= self
._get
_author
(svn_commit
)
270 log_msg
= self
._get
_log
_msg
(svn_commit
)
273 for cvs_rev
in svn_commit
.get_cvs_items():
274 lods
.add(cvs_rev
.lod
)
276 raise InternalError('Commit affects %d LODs' % (len(lods
),))
279 self
._mirror
.start_commit(svn_commit
.revnum
)
280 if isinstance(lod
, Trunk
):
281 # FIXME: is this correct?:
282 self
.f
.write('commit refs/heads/master\n')
284 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
287 % (self
._create
_commit
_mark
(lod
, svn_commit
.revnum
),)
290 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
292 self
.f
.write('data %d\n' % (len(log_msg
),))
293 self
.f
.write('%s\n' % (log_msg
,))
294 for cvs_rev
in svn_commit
.get_cvs_items():
295 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
298 self
._mirror
.end_commit()
300 def process_post_commit(self
, svn_commit
):
301 author
= self
._get
_author
(svn_commit
)
302 log_msg
= self
._get
_log
_msg
(svn_commit
)
305 for cvs_rev
in svn_commit
.cvs_revs
:
306 source_lods
.add(cvs_rev
.lod
)
307 if len(source_lods
) != 1:
308 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
309 source_lod
= source_lods
.pop()
311 self
._mirror
.start_commit(svn_commit
.revnum
)
312 # FIXME: is this correct?:
313 self
.f
.write('commit refs/heads/master\n')
316 % (self
._create
_commit
_mark
(None, svn_commit
.revnum
),)
319 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
321 self
.f
.write('data %d\n' % (len(log_msg
),))
322 self
.f
.write('%s\n' % (log_msg
,))
325 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
327 for cvs_rev
in svn_commit
.cvs_revs
:
328 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
331 self
._mirror
.end_commit()
333 def _get_source_mark(self
, source_lod
, revnum
):
334 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
336 modifications
= self
._marks
[source_lod
]
337 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
338 (revnum
, mark
) = modifications
[i
]
341 def describe_lod_to_user(self
, lod
):
342 """This needs to make sense to users of the fastimported result."""
343 if isinstance(lod
, Trunk
):
348 def _describe_commit(self
, svn_commit
, lod
):
349 author
= self
._map
_author
(svn_commit
.get_author())
350 if author
.endswith(" <>"):
352 date
= time
.strftime("%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
))
353 log_msg
= svn_commit
.get_log_msg()
354 if log_msg
.find('\n') != -1:
355 log_msg
= log_msg
[:log_msg
.index('\n')]
356 return "%s %s %s '%s'" % (
357 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
359 def _process_symbol_commit(
360 self
, svn_commit
, git_branch
, source_groups
, mark
362 author
= self
._get
_author
(svn_commit
)
363 log_msg
= self
._get
_log
_msg
(svn_commit
)
365 # Get the primary parent
366 p_source_lod
, p_source_revnum
, p_cvs_symbols
= source_groups
[0]
368 p_source_node
= self
._mirror
.get_old_lod_directory(p_source_lod
, p_source_revnum
)
370 raise InternalError('Source %r does not exist' % (p_source_lod
,))
371 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
373 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
374 for cvs_symbol
in cvs_symbols
:
375 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
377 # Write a trailer to the log message which describes the cherrypicks that
378 # make up this symbol creation.
380 log_msg
+= "\nSprout from %s" % (
381 self
._describe
_commit
(Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
383 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
[1:]:
384 log_msg
+= "\nCherrypick from %s:" % (
385 self
._describe
_commit
(Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
387 for cvs_symbol
in cvs_symbols
:
388 log_msg
+= "\n %s" % (cvs_symbol
.cvs_file
.cvs_path
,)
389 if len(cvs_files_to_delete
):
390 log_msg
+= "\nDelete:"
391 for cvs_file
in sorted(cvs_files_to_delete
):
392 log_msg
+= "\n %s" % (cvs_file
.cvs_path
,)
394 self
.f
.write('commit %s\n' % (git_branch
,))
395 self
.f
.write('mark :%d\n' % (mark
,))
396 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
397 self
.f
.write('data %d\n' % (len(log_msg
),))
398 self
.f
.write('%s\n' % (log_msg
,))
400 # Only record actual DVCS ancestry for the primary sprout parent,
401 # all the rest are effectively cherrypicks.
404 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
407 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
408 for cvs_symbol
in cvs_symbols
:
409 self
.revision_writer
.branch_file(cvs_symbol
)
411 for cvs_file
in cvs_files_to_delete
:
412 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
416 def process_branch_commit(self
, svn_commit
):
417 self
._mirror
.start_commit(svn_commit
.revnum
)
419 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
420 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
421 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
423 '%s will be created via a simple copy from %s:r%d'
424 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
426 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
427 self
._set
_symbol
(svn_commit
.symbol
, mark
)
428 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
429 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
432 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
434 self
._process
_symbol
_commit
(
435 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
437 self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
),
440 self
._mirror
.end_commit()
442 def _set_symbol(self
, symbol
, mark
):
443 if isinstance(symbol
, Branch
):
445 elif isinstance(symbol
, Tag
):
448 raise InternalError()
449 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
450 self
.f
.write('from :%d\n' % (mark
,))
452 def get_tag_fixup_branch_name(self
, svn_commit
):
453 # The branch name to use for the "tag fixup branches". The git-fast-import
454 # documentation suggests using 'TAG_FIXUP' (outside of the refs/heads
455 # namespace), but this is currently broken.
456 # Use a name containing '.', which is not allowed in CVS symbols, to avoid
457 # conflicts (though of course a conflict could still result if the user
458 # requests symbol transformations).
459 return 'refs/heads/TAG.FIXUP'
461 def process_tag_commit(self
, svn_commit
):
462 # FIXME: For now we create a fixup branch with the same name as
463 # the tag, then the tag. We never delete the fixup branch.
464 self
._mirror
.start_commit(svn_commit
.revnum
)
466 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
467 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
468 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
470 '%s will be created via a simple copy from %s:r%d'
471 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
473 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
474 self
._set
_symbol
(svn_commit
.symbol
, mark
)
475 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
476 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
479 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
482 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
484 # Create the fixup branch (which might involve making more than
486 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
487 self
._process
_symbol
_commit
(
488 svn_commit
, fixup_branch_name
, source_groups
, mark
491 # Store the mark of the last commit to the fixup branch as the
493 self
._set
_symbol
(svn_commit
.symbol
, mark
)
494 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
497 if self
.tie_tag_fixup_branches
:
498 source_lod
= source_groups
[0][0]
499 source_lod_git_branch
= 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
501 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
502 author
= self
._map
_author
(Ctx().username
)
503 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
505 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
506 self
.f
.write('mark :%d\n' % (mark2
,))
507 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
508 self
.f
.write('data %d\n' % (len(log_msg
),))
509 self
.f
.write('%s\n' % (log_msg
,))
518 self
._mirror
.end_commit()
520 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
521 return Ctx().text_wrapper
.fill(
522 Ctx().tie_tag_ancestry_message
% {
523 'symbol_name' : svn_commit
.symbol
.name
,
528 DVCSOutputOption
.cleanup(self
)
529 self
.revision_writer
.finish()