1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.log
import logger
30 from cvs2svn_lib
.context
import Ctx
31 from cvs2svn_lib
.symbol
import Trunk
32 from cvs2svn_lib
.symbol
import Branch
33 from cvs2svn_lib
.symbol
import Tag
34 from cvs2svn_lib
.cvs_item
import CVSSymbol
35 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
36 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
37 from cvs2svn_lib
.key_generator
import KeyGenerator
40 class GitRevisionWriter(MirrorUpdater
):
42 def start(self
, mirror
, f
):
43 super(GitRevisionWriter
, self
).start(mirror
)
46 def _modify_file(self
, cvs_item
, post_commit
):
47 raise NotImplementedError()
49 def add_file(self
, cvs_rev
, post_commit
):
50 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
51 self
._modify
_file
(cvs_rev
, post_commit
)
53 def modify_file(self
, cvs_rev
, post_commit
):
54 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
55 self
._modify
_file
(cvs_rev
, post_commit
)
57 def delete_file(self
, cvs_rev
, post_commit
):
58 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
59 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
61 def branch_file(self
, cvs_symbol
):
62 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
63 self
._modify
_file
(cvs_symbol
, post_commit
=False)
66 super(GitRevisionWriter
, self
).finish()
70 class GitRevisionMarkWriter(GitRevisionWriter
):
71 def _modify_file(self
, cvs_item
, post_commit
):
72 if cvs_item
.cvs_file
.executable
:
79 % (mode
, cvs_item
.revision_reader_token
,
80 cvs_item
.cvs_file
.cvs_path
,)
84 class GitRevisionInlineWriter(GitRevisionWriter
):
85 def __init__(self
, revision_reader
):
86 self
.revision_reader
= revision_reader
88 def register_artifacts(self
, which_pass
):
89 GitRevisionWriter
.register_artifacts(self
, which_pass
)
90 self
.revision_reader
.register_artifacts(which_pass
)
92 def start(self
, mirror
, f
):
93 GitRevisionWriter
.start(self
, mirror
, f
)
94 self
.revision_reader
.start()
96 def _modify_file(self
, cvs_item
, post_commit
):
97 if cvs_item
.cvs_file
.executable
:
104 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
107 if isinstance(cvs_item
, CVSSymbol
):
108 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
112 # FIXME: We have to decide what to do about keyword substitution
113 # and eol_style here:
114 fulltext
= self
.revision_reader
.get_content(cvs_rev
)
116 self
.f
.write('data %d\n' % (len(fulltext
),))
117 self
.f
.write(fulltext
)
121 GitRevisionWriter
.finish(self
)
122 self
.revision_reader
.finish()
125 class GitOutputOption(DVCSOutputOption
):
126 """An OutputOption that outputs to a git-fast-import formatted file.
130 dump_filename -- (string) the name of the file to which the
131 git-fast-import commands for defining revisions will be
134 author_transforms -- a map from CVS author names to git full name
135 and email address. See
136 DVCSOutputOption.normalize_author_transforms() for information
137 about the form of this parameter.
143 # The first mark number used for git-fast-import commit marks. This
144 # value needs to be large to avoid conflicts with blob marks.
145 _first_commit_mark
= 1000000000
148 self
, dump_filename
, revision_writer
,
149 author_transforms
=None,
150 tie_tag_fixup_branches
=False,
154 DUMP_FILENAME is the name of the file to which the git-fast-import
155 commands for defining revisions should be written. (Please note
156 that depending on the style of revision writer, the actual file
157 contents might not be written to this file.)
159 REVISION_WRITER is a GitRevisionWriter that is used to output
160 either the content of revisions or a mark that was previously used
163 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
164 CVS author names to git full name and email address. All of the
165 contents should either be Unicode strings or 8-bit strings encoded
168 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
169 fixup branch, it should be psuedo-merged (ancestry linked but no
170 content changes) back into its source branch, to dispose of the
174 DVCSOutputOption
.__init
__(self
)
175 self
.dump_filename
= dump_filename
176 self
.revision_writer
= revision_writer
178 self
.author_transforms
= self
.normalize_author_transforms(
182 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
184 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
186 def register_artifacts(self
, which_pass
):
187 DVCSOutputOption
.register_artifacts(self
, which_pass
)
188 self
.revision_writer
.register_artifacts(which_pass
)
190 def check_symbols(self
, symbol_map
):
191 # FIXME: What constraints does git impose on symbols?
194 def setup(self
, svn_rev_count
):
195 DVCSOutputOption
.setup(self
, svn_rev_count
)
196 self
.f
= open(self
.dump_filename
, 'wb')
198 # The youngest revnum that has been committed so far:
201 # A map {lod : [(revnum, mark)]} giving each of the revision
202 # numbers in which there was a commit to lod, and the mark active
203 # at the end of the revnum.
206 self
.revision_writer
.start(self
._mirror
, self
.f
)
208 def _create_commit_mark(self
, lod
, revnum
):
209 mark
= self
._mark
_generator
.gen_id()
210 self
._set
_lod
_mark
(lod
, revnum
, mark
)
213 def _set_lod_mark(self
, lod
, revnum
, mark
):
214 """Record MARK as the status of LOD for REVNUM.
216 If there is already an entry for REVNUM, overwrite it. If not,
217 append a new entry to the self._marks list for LOD."""
219 assert revnum
>= self
._youngest
220 entry
= (revnum
, mark
)
222 modifications
= self
._marks
[lod
]
224 # This LOD hasn't appeared before; create a new list and add the
226 self
._marks
[lod
] = [entry
]
228 # A record exists, so it necessarily has at least one element:
229 if modifications
[-1][0] == revnum
:
230 modifications
[-1] = entry
232 modifications
.append(entry
)
233 self
._youngest
= revnum
235 def _get_author(self
, svn_commit
):
236 """Return the author to be used for SVN_COMMIT.
238 Return the author as a UTF-8 string in the form needed by git
239 fast-import; that is, 'name <email>'."""
241 cvs_author
= svn_commit
.get_author()
242 return self
._map
_author
(cvs_author
)
244 def _map_author(self
, cvs_author
):
245 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
248 def _get_log_msg(svn_commit
):
249 return svn_commit
.get_log_msg()
251 def process_initial_project_commit(self
, svn_commit
):
252 self
._mirror
.start_commit(svn_commit
.revnum
)
253 self
._mirror
.end_commit()
255 def process_primary_commit(self
, svn_commit
):
256 author
= self
._get
_author
(svn_commit
)
257 log_msg
= self
._get
_log
_msg
(svn_commit
)
260 for cvs_rev
in svn_commit
.get_cvs_items():
261 lods
.add(cvs_rev
.lod
)
263 raise InternalError('Commit affects %d LODs' % (len(lods
),))
266 self
._mirror
.start_commit(svn_commit
.revnum
)
267 if isinstance(lod
, Trunk
):
268 # FIXME: is this correct?:
269 self
.f
.write('commit refs/heads/master\n')
271 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
272 mark
= self
._create
_commit
_mark
(lod
, svn_commit
.revnum
)
274 'Writing commit r%d on %s (mark :%d)'
275 % (svn_commit
.revnum
, lod
, mark
,)
277 self
.f
.write('mark :%d\n' % (mark
,))
279 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
281 self
.f
.write('data %d\n' % (len(log_msg
),))
282 self
.f
.write('%s\n' % (log_msg
,))
283 for cvs_rev
in svn_commit
.get_cvs_items():
284 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
287 self
._mirror
.end_commit()
289 def process_post_commit(self
, svn_commit
):
290 author
= self
._get
_author
(svn_commit
)
291 log_msg
= self
._get
_log
_msg
(svn_commit
)
294 for cvs_rev
in svn_commit
.cvs_revs
:
295 source_lods
.add(cvs_rev
.lod
)
296 if len(source_lods
) != 1:
297 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
298 source_lod
= source_lods
.pop()
300 self
._mirror
.start_commit(svn_commit
.revnum
)
301 # FIXME: is this correct?:
302 self
.f
.write('commit refs/heads/master\n')
303 mark
= self
._create
_commit
_mark
(None, svn_commit
.revnum
)
305 'Writing post-commit r%d on %s (mark :%d)'
306 % (svn_commit
.revnum
, lod
, mark
,)
308 self
.f
.write('mark :%d\n' % (mark
,))
310 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
312 self
.f
.write('data %d\n' % (len(log_msg
),))
313 self
.f
.write('%s\n' % (log_msg
,))
316 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
318 for cvs_rev
in svn_commit
.cvs_revs
:
319 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
322 self
._mirror
.end_commit()
324 def _get_source_mark(self
, source_lod
, revnum
):
325 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
327 modifications
= self
._marks
[source_lod
]
328 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
329 (revnum
, mark
) = modifications
[i
]
332 def describe_lod_to_user(self
, lod
):
333 """This needs to make sense to users of the fastimported result."""
334 if isinstance(lod
, Trunk
):
339 def _describe_commit(self
, svn_commit
, lod
):
340 author
= self
._map
_author
(svn_commit
.get_author())
341 if author
.endswith(" <>"):
343 date
= time
.strftime(
344 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
346 log_msg
= svn_commit
.get_log_msg()
347 if log_msg
.find('\n') != -1:
348 log_msg
= log_msg
[:log_msg
.index('\n')]
349 return "%s %s %s '%s'" % (
350 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
352 def _process_symbol_commit(self
, svn_commit
, git_branch
, source_groups
):
353 author
= self
._get
_author
(svn_commit
)
354 log_msg
= self
._get
_log
_msg
(svn_commit
)
356 # There are two distinct cases we need to care for here:
357 # 1. initial creation of a LOD
358 # 2. fixup of an existing LOD to include more files, because the LOD in
359 # CVS was created piecemeal over time, with intervening commits
361 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
362 # might be technically more correct (though _get_lod_history is currently
363 # underscore-private)
364 is_initial_lod_creation
= svn_commit
.symbol
not in self
._marks
366 # Create the mark, only after the check above
367 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
369 if is_initial_lod_creation
:
370 # Get the primary parent
371 p_source_revnum
, p_source_lod
, p_cvs_symbols
= source_groups
[0]
373 p_source_node
= self
._mirror
.get_old_lod_directory(
374 p_source_lod
, p_source_revnum
377 raise InternalError('Source %r does not exist' % (p_source_lod
,))
378 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
380 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
381 for cvs_symbol
in cvs_symbols
:
382 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
384 # Write a trailer to the log message which describes the cherrypicks that
385 # make up this symbol creation.
387 if is_initial_lod_creation
:
388 log_msg
+= "\nSprout from %s" % (
389 self
._describe
_commit
(
390 Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
394 for (source_revnum
, source_lod
, cvs_symbols
,) \
395 in source_groups
[(is_initial_lod_creation
and 1 or 0):]:
396 log_msg
+= "\nCherrypick from %s:" % (
397 self
._describe
_commit
(
398 Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
402 for cvs_path
in sorted(
403 cvs_symbol
.cvs_file
.cvs_path
for cvs_symbol
in cvs_symbols
405 log_msg
+= "\n %s" % (cvs_path
,)
406 if is_initial_lod_creation
:
407 if cvs_files_to_delete
:
408 log_msg
+= "\nDelete:"
409 for cvs_path
in sorted(
410 cvs_file
.cvs_path
for cvs_file
in cvs_files_to_delete
412 log_msg
+= "\n %s" % (cvs_path
,)
414 self
.f
.write('commit %s\n' % (git_branch
,))
415 self
.f
.write('mark :%d\n' % (mark
,))
416 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
417 self
.f
.write('data %d\n' % (len(log_msg
),))
418 self
.f
.write('%s\n' % (log_msg
,))
420 # Only record actual DVCS ancestry for the primary sprout parent,
421 # all the rest are effectively cherrypicks.
422 if is_initial_lod_creation
:
425 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
428 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
429 for cvs_symbol
in cvs_symbols
:
430 self
.revision_writer
.branch_file(cvs_symbol
)
432 if is_initial_lod_creation
:
433 for cvs_file
in cvs_files_to_delete
:
434 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
439 def process_branch_commit(self
, svn_commit
):
440 self
._mirror
.start_commit(svn_commit
.revnum
)
442 source_groups
= self
._get
_source
_groups
(svn_commit
)
443 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
444 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
446 '%s will be created via a simple copy from %s:r%d'
447 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
449 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
450 self
._set
_symbol
(svn_commit
.symbol
, mark
)
451 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
452 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
455 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
457 self
._process
_symbol
_commit
(
458 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
462 self
._mirror
.end_commit()
464 def _set_symbol(self
, symbol
, mark
):
465 if isinstance(symbol
, Branch
):
467 elif isinstance(symbol
, Tag
):
470 raise InternalError()
471 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
472 self
.f
.write('from :%d\n' % (mark
,))
474 def get_tag_fixup_branch_name(self
, svn_commit
):
475 # The branch name to use for the "tag fixup branches". The
476 # git-fast-import documentation suggests using 'TAG_FIXUP'
477 # (outside of the refs/heads namespace), but this is currently
478 # broken. Use a name containing '.', which is not allowed in CVS
479 # symbols, to avoid conflicts (though of course a conflict could
480 # still result if the user requests symbol transformations).
481 return 'refs/heads/TAG.FIXUP'
483 def process_tag_commit(self
, svn_commit
):
484 # FIXME: For now we create a fixup branch with the same name as
485 # the tag, then the tag. We never delete the fixup branch.
486 self
._mirror
.start_commit(svn_commit
.revnum
)
488 source_groups
= self
._get
_source
_groups
(svn_commit
)
489 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
490 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
492 '%s will be created via a simple copy from %s:r%d'
493 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
495 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
496 self
._set
_symbol
(svn_commit
.symbol
, mark
)
497 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
498 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
501 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
504 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
506 # Create the fixup branch (which might involve making more than
508 mark
= self
._process
_symbol
_commit
(
509 svn_commit
, fixup_branch_name
, source_groups
512 # Store the mark of the last commit to the fixup branch as the
514 self
._set
_symbol
(svn_commit
.symbol
, mark
)
515 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
518 if self
.tie_tag_fixup_branches
:
519 source_lod
= source_groups
[0][1]
520 source_lod_git_branch
= \
521 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
523 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
524 author
= self
._map
_author
(Ctx().username
)
525 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
527 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
528 self
.f
.write('mark :%d\n' % (mark2
,))
529 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
530 self
.f
.write('data %d\n' % (len(log_msg
),))
531 self
.f
.write('%s\n' % (log_msg
,))
540 self
._mirror
.end_commit()
542 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
543 return Ctx().text_wrapper
.fill(
544 Ctx().tie_tag_ancestry_message
% {
545 'symbol_name' : svn_commit
.symbol
.name
,
550 DVCSOutputOption
.cleanup(self
)
551 self
.revision_writer
.finish()