1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.log
import Log
30 from cvs2svn_lib
.context
import Ctx
31 from cvs2svn_lib
.symbol
import Trunk
32 from cvs2svn_lib
.symbol
import Branch
33 from cvs2svn_lib
.symbol
import Tag
34 from cvs2svn_lib
.cvs_item
import CVSSymbol
35 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
36 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
37 from cvs2svn_lib
.key_generator
import KeyGenerator
40 class ExpectedDirectoryError(Exception):
41 """A file was found where a directory was expected."""
46 class ExpectedFileError(Exception):
47 """A directory was found where a file was expected."""
52 class GitRevisionWriter(MirrorUpdater
):
54 def start(self
, mirror
, f
):
55 super(GitRevisionWriter
, self
).start(mirror
)
58 def _modify_file(self
, cvs_item
, post_commit
):
59 raise NotImplementedError()
61 def add_file(self
, cvs_rev
, post_commit
):
62 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
63 self
._modify
_file
(cvs_rev
, post_commit
)
65 def modify_file(self
, cvs_rev
, post_commit
):
66 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
67 self
._modify
_file
(cvs_rev
, post_commit
)
69 def delete_file(self
, cvs_rev
, post_commit
):
70 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
71 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
73 def branch_file(self
, cvs_symbol
):
74 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
75 self
._modify
_file
(cvs_symbol
, post_commit
=False)
78 super(GitRevisionWriter
, self
).finish()
82 class GitRevisionMarkWriter(GitRevisionWriter
):
83 def _modify_file(self
, cvs_item
, post_commit
):
84 if cvs_item
.cvs_file
.executable
:
91 % (mode
, cvs_item
.revision_reader_token
,
92 cvs_item
.cvs_file
.cvs_path
,)
96 class GitRevisionInlineWriter(GitRevisionWriter
):
97 def __init__(self
, revision_reader
):
98 self
.revision_reader
= revision_reader
100 def register_artifacts(self
, which_pass
):
101 GitRevisionWriter
.register_artifacts(self
, which_pass
)
102 self
.revision_reader
.register_artifacts(which_pass
)
104 def start(self
, mirror
, f
):
105 GitRevisionWriter
.start(self
, mirror
, f
)
106 self
.revision_reader
.start()
108 def _modify_file(self
, cvs_item
, post_commit
):
109 if cvs_item
.cvs_file
.executable
:
116 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
119 if isinstance(cvs_item
, CVSSymbol
):
120 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
124 # FIXME: We have to decide what to do about keyword substitution
125 # and eol_style here:
126 fulltext
= self
.revision_reader
.get_content(cvs_rev
)
128 self
.f
.write('data %d\n' % (len(fulltext
),))
129 self
.f
.write(fulltext
)
133 GitRevisionWriter
.finish(self
)
134 self
.revision_reader
.finish()
137 class GitOutputOption(DVCSOutputOption
):
138 """An OutputOption that outputs to a git-fast-import formatted file.
142 dump_filename -- (string) the name of the file to which the
143 git-fast-import commands for defining revisions will be
146 author_transforms -- a map from CVS author names to git full name
147 and email address. See
148 DVCSOutputOption.normalize_author_transforms() for information
149 about the form of this parameter.
155 # The first mark number used for git-fast-import commit marks. This
156 # value needs to be large to avoid conflicts with blob marks.
157 _first_commit_mark
= 1000000000
160 self
, dump_filename
, revision_writer
,
161 author_transforms
=None,
162 tie_tag_fixup_branches
=False,
166 DUMP_FILENAME is the name of the file to which the git-fast-import
167 commands for defining revisions should be written. (Please note
168 that depending on the style of revision writer, the actual file
169 contents might not be written to this file.)
171 REVISION_WRITER is a GitRevisionWriter that is used to output
172 either the content of revisions or a mark that was previously used
175 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
176 CVS author names to git full name and email address. All of the
177 contents should either be Unicode strings or 8-bit strings encoded
180 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
181 fixup branch, it should be psuedo-merged (ancestry linked but no
182 content changes) back into its source branch, to dispose of the
186 DVCSOutputOption
.__init
__(self
)
187 self
.dump_filename
= dump_filename
188 self
.revision_writer
= revision_writer
190 self
.author_transforms
= self
.normalize_author_transforms(
194 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
196 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
198 def register_artifacts(self
, which_pass
):
199 DVCSOutputOption
.register_artifacts(self
, which_pass
)
200 self
.revision_writer
.register_artifacts(which_pass
)
202 def check_symbols(self
, symbol_map
):
203 # FIXME: What constraints does git impose on symbols?
206 def setup(self
, svn_rev_count
):
207 DVCSOutputOption
.setup(self
, svn_rev_count
)
208 self
.f
= open(self
.dump_filename
, 'wb')
210 # The youngest revnum that has been committed so far:
213 # A map {lod : [(revnum, mark)]} giving each of the revision
214 # numbers in which there was a commit to lod, and the mark active
215 # at the end of the revnum.
218 self
.revision_writer
.start(self
._mirror
, self
.f
)
220 def _create_commit_mark(self
, lod
, revnum
):
221 mark
= self
._mark
_generator
.gen_id()
222 self
._set
_lod
_mark
(lod
, revnum
, mark
)
225 def _set_lod_mark(self
, lod
, revnum
, mark
):
226 """Record MARK as the status of LOD for REVNUM.
228 If there is already an entry for REVNUM, overwrite it. If not,
229 append a new entry to the self._marks list for LOD."""
231 assert revnum
>= self
._youngest
232 entry
= (revnum
, mark
)
234 modifications
= self
._marks
[lod
]
236 # This LOD hasn't appeared before; create a new list and add the
238 self
._marks
[lod
] = [entry
]
240 # A record exists, so it necessarily has at least one element:
241 if modifications
[-1][0] == revnum
:
242 modifications
[-1] = entry
244 modifications
.append(entry
)
245 self
._youngest
= revnum
247 def _get_author(self
, svn_commit
):
248 """Return the author to be used for SVN_COMMIT.
250 Return the author as a UTF-8 string in the form needed by git
251 fast-import; that is, 'name <email>'."""
253 cvs_author
= svn_commit
.get_author()
254 return self
._map
_author
(cvs_author
)
256 def _map_author(self
, cvs_author
):
257 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
260 def _get_log_msg(svn_commit
):
261 return svn_commit
.get_log_msg()
263 def process_initial_project_commit(self
, svn_commit
):
264 self
._mirror
.start_commit(svn_commit
.revnum
)
265 self
._mirror
.end_commit()
267 def process_primary_commit(self
, svn_commit
):
268 author
= self
._get
_author
(svn_commit
)
269 log_msg
= self
._get
_log
_msg
(svn_commit
)
272 for cvs_rev
in svn_commit
.get_cvs_items():
273 lods
.add(cvs_rev
.lod
)
275 raise InternalError('Commit affects %d LODs' % (len(lods
),))
278 self
._mirror
.start_commit(svn_commit
.revnum
)
279 if isinstance(lod
, Trunk
):
280 # FIXME: is this correct?:
281 self
.f
.write('commit refs/heads/master\n')
283 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
286 % (self
._create
_commit
_mark
(lod
, svn_commit
.revnum
),)
289 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
291 self
.f
.write('data %d\n' % (len(log_msg
),))
292 self
.f
.write('%s\n' % (log_msg
,))
293 for cvs_rev
in svn_commit
.get_cvs_items():
294 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
297 self
._mirror
.end_commit()
299 def process_post_commit(self
, svn_commit
):
300 author
= self
._get
_author
(svn_commit
)
301 log_msg
= self
._get
_log
_msg
(svn_commit
)
304 for cvs_rev
in svn_commit
.cvs_revs
:
305 source_lods
.add(cvs_rev
.lod
)
306 if len(source_lods
) != 1:
307 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
308 source_lod
= source_lods
.pop()
310 self
._mirror
.start_commit(svn_commit
.revnum
)
311 # FIXME: is this correct?:
312 self
.f
.write('commit refs/heads/master\n')
315 % (self
._create
_commit
_mark
(None, svn_commit
.revnum
),)
318 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
320 self
.f
.write('data %d\n' % (len(log_msg
),))
321 self
.f
.write('%s\n' % (log_msg
,))
324 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
326 for cvs_rev
in svn_commit
.cvs_revs
:
327 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
330 self
._mirror
.end_commit()
332 def _get_source_mark(self
, source_lod
, revnum
):
333 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
335 modifications
= self
._marks
[source_lod
]
336 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
337 (revnum
, mark
) = modifications
[i
]
340 def describe_lod_to_user(self
, lod
):
341 """This needs to make sense to users of the fastimported result."""
342 if isinstance(lod
, Trunk
):
347 def _describe_commit(self
, svn_commit
, lod
):
348 author
= self
._map
_author
(svn_commit
.get_author())
349 if author
.endswith(" <>"):
351 date
= time
.strftime(
352 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
354 log_msg
= svn_commit
.get_log_msg()
355 if log_msg
.find('\n') != -1:
356 log_msg
= log_msg
[:log_msg
.index('\n')]
357 return "%s %s %s '%s'" % (
358 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
360 def _process_symbol_commit(
361 self
, svn_commit
, git_branch
, source_groups
, mark
363 author
= self
._get
_author
(svn_commit
)
364 log_msg
= self
._get
_log
_msg
(svn_commit
)
366 # There are two distinct cases we need to care for here:
367 # 1. initial creation of a LOD
368 # 2. fixup of an existing LOD to include more files, because the LOD in
369 # CVS was created piecemeal over time, with intervening commits
371 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
372 # might be technically more correct (though _get_lod_history is currently
373 # underscore-private)
374 is_initial_lod_creation
= svn_commit
.symbol
not in self
._marks
376 if is_initial_lod_creation
:
377 # Get the primary parent
378 p_source_lod
, p_source_revnum
, p_cvs_symbols
= source_groups
[0]
380 p_source_node
= self
._mirror
.get_old_lod_directory(
381 p_source_lod
, p_source_revnum
384 raise InternalError('Source %r does not exist' % (p_source_lod
,))
385 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
387 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
388 for cvs_symbol
in cvs_symbols
:
389 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
391 # Write a trailer to the log message which describes the cherrypicks that
392 # make up this symbol creation.
394 if is_initial_lod_creation
:
395 log_msg
+= "\nSprout from %s" % (
396 self
._describe
_commit
(
397 Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
401 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
[(is_initial_lod_creation
and 1 or 0):]:
402 log_msg
+= "\nCherrypick from %s:" % (
403 self
._describe
_commit
(
404 Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
408 for cvs_symbol
in cvs_symbols
:
409 log_msg
+= "\n %s" % (cvs_symbol
.cvs_file
.cvs_path
,)
410 if is_initial_lod_creation
:
411 if len(cvs_files_to_delete
):
412 log_msg
+= "\nDelete:"
413 for cvs_file
in sorted(cvs_files_to_delete
):
414 log_msg
+= "\n %s" % (cvs_file
.cvs_path
,)
416 self
.f
.write('commit %s\n' % (git_branch
,))
417 self
.f
.write('mark :%d\n' % (mark
,))
418 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
419 self
.f
.write('data %d\n' % (len(log_msg
),))
420 self
.f
.write('%s\n' % (log_msg
,))
422 # Only record actual DVCS ancestry for the primary sprout parent,
423 # all the rest are effectively cherrypicks.
424 if is_initial_lod_creation
:
427 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
430 for (source_lod
, source_revnum
, cvs_symbols
,) in source_groups
:
431 for cvs_symbol
in cvs_symbols
:
432 self
.revision_writer
.branch_file(cvs_symbol
)
434 if is_initial_lod_creation
:
435 for cvs_file
in cvs_files_to_delete
:
436 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
440 def process_branch_commit(self
, svn_commit
):
441 self
._mirror
.start_commit(svn_commit
.revnum
)
443 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
444 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
445 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
447 '%s will be created via a simple copy from %s:r%d'
448 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
450 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
451 self
._set
_symbol
(svn_commit
.symbol
, mark
)
452 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
453 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
456 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
458 self
._process
_symbol
_commit
(
459 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
461 self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
),
464 self
._mirror
.end_commit()
466 def _set_symbol(self
, symbol
, mark
):
467 if isinstance(symbol
, Branch
):
469 elif isinstance(symbol
, Tag
):
472 raise InternalError()
473 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
474 self
.f
.write('from :%d\n' % (mark
,))
476 def get_tag_fixup_branch_name(self
, svn_commit
):
477 # The branch name to use for the "tag fixup branches". The
478 # git-fast-import documentation suggests using 'TAG_FIXUP'
479 # (outside of the refs/heads namespace), but this is currently
480 # broken. Use a name containing '.', which is not allowed in CVS
481 # symbols, to avoid conflicts (though of course a conflict could
482 # still result if the user requests symbol transformations).
483 return 'refs/heads/TAG.FIXUP'
485 def process_tag_commit(self
, svn_commit
):
486 # FIXME: For now we create a fixup branch with the same name as
487 # the tag, then the tag. We never delete the fixup branch.
488 self
._mirror
.start_commit(svn_commit
.revnum
)
490 source_groups
= list(self
._get
_source
_groups
(svn_commit
))
491 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
492 (source_lod
, source_revnum
, cvs_symbols
) = source_groups
[0]
494 '%s will be created via a simple copy from %s:r%d'
495 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
497 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
498 self
._set
_symbol
(svn_commit
.symbol
, mark
)
499 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
500 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
503 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
506 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
508 # Create the fixup branch (which might involve making more than
510 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
511 self
._process
_symbol
_commit
(
512 svn_commit
, fixup_branch_name
, source_groups
, mark
515 # Store the mark of the last commit to the fixup branch as the
517 self
._set
_symbol
(svn_commit
.symbol
, mark
)
518 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
521 if self
.tie_tag_fixup_branches
:
522 source_lod
= source_groups
[0][0]
523 source_lod_git_branch
= \
524 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
526 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
527 author
= self
._map
_author
(Ctx().username
)
528 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
530 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
531 self
.f
.write('mark :%d\n' % (mark2
,))
532 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
533 self
.f
.write('data %d\n' % (len(log_msg
),))
534 self
.f
.write('%s\n' % (log_msg
,))
543 self
._mirror
.end_commit()
545 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
546 return Ctx().text_wrapper
.fill(
547 Ctx().tie_tag_ancestry_message
% {
548 'symbol_name' : svn_commit
.symbol
.name
,
553 DVCSOutputOption
.cleanup(self
)
554 self
.revision_writer
.finish()