1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.log
import logger
30 from cvs2svn_lib
.context
import Ctx
31 from cvs2svn_lib
.symbol
import Trunk
32 from cvs2svn_lib
.symbol
import Branch
33 from cvs2svn_lib
.symbol
import Tag
34 from cvs2svn_lib
.cvs_item
import CVSSymbol
35 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
36 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
37 from cvs2svn_lib
.key_generator
import KeyGenerator
40 class GitRevisionWriter(MirrorUpdater
):
42 def start(self
, mirror
, f
):
43 super(GitRevisionWriter
, self
).start(mirror
)
46 def _modify_file(self
, cvs_item
, post_commit
):
47 raise NotImplementedError()
49 def add_file(self
, cvs_rev
, post_commit
):
50 super(GitRevisionWriter
, self
).add_file(cvs_rev
, post_commit
)
51 self
._modify
_file
(cvs_rev
, post_commit
)
53 def modify_file(self
, cvs_rev
, post_commit
):
54 super(GitRevisionWriter
, self
).modify_file(cvs_rev
, post_commit
)
55 self
._modify
_file
(cvs_rev
, post_commit
)
57 def delete_file(self
, cvs_rev
, post_commit
):
58 super(GitRevisionWriter
, self
).delete_file(cvs_rev
, post_commit
)
59 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
61 def branch_file(self
, cvs_symbol
):
62 super(GitRevisionWriter
, self
).branch_file(cvs_symbol
)
63 self
._modify
_file
(cvs_symbol
, post_commit
=False)
66 super(GitRevisionWriter
, self
).finish()
70 class GitRevisionMarkWriter(GitRevisionWriter
):
71 def _modify_file(self
, cvs_item
, post_commit
):
72 if cvs_item
.cvs_file
.executable
:
79 % (mode
, cvs_item
.revision_reader_token
,
80 cvs_item
.cvs_file
.cvs_path
,)
84 class GitRevisionInlineWriter(GitRevisionWriter
):
85 def __init__(self
, revision_reader
):
86 self
.revision_reader
= revision_reader
88 def register_artifacts(self
, which_pass
):
89 GitRevisionWriter
.register_artifacts(self
, which_pass
)
90 self
.revision_reader
.register_artifacts(which_pass
)
92 def start(self
, mirror
, f
):
93 GitRevisionWriter
.start(self
, mirror
, f
)
94 self
.revision_reader
.start()
96 def _modify_file(self
, cvs_item
, post_commit
):
97 if cvs_item
.cvs_file
.executable
:
104 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
107 if isinstance(cvs_item
, CVSSymbol
):
108 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
112 # FIXME: We have to decide what to do about keyword substitution
113 # and eol_style here:
114 fulltext
= self
.revision_reader
.get_content(cvs_rev
)
116 self
.f
.write('data %d\n' % (len(fulltext
),))
117 self
.f
.write(fulltext
)
121 GitRevisionWriter
.finish(self
)
122 self
.revision_reader
.finish()
125 class GitOutputOption(DVCSOutputOption
):
126 """An OutputOption that outputs to a git-fast-import formatted file.
130 dump_filename -- (string) the name of the file to which the
131 git-fast-import commands for defining revisions will be
134 author_transforms -- a map from CVS author names to git full name
135 and email address. See
136 DVCSOutputOption.normalize_author_transforms() for information
137 about the form of this parameter.
143 # The first mark number used for git-fast-import commit marks. This
144 # value needs to be large to avoid conflicts with blob marks.
145 _first_commit_mark
= 1000000000
148 self
, dump_filename
, revision_writer
,
149 author_transforms
=None,
150 tie_tag_fixup_branches
=False,
154 DUMP_FILENAME is the name of the file to which the git-fast-import
155 commands for defining revisions should be written. (Please note
156 that depending on the style of revision writer, the actual file
157 contents might not be written to this file.)
159 REVISION_WRITER is a GitRevisionWriter that is used to output
160 either the content of revisions or a mark that was previously used
163 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
164 CVS author names to git full name and email address. All of the
165 contents should either be Unicode strings or 8-bit strings encoded
168 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
169 fixup branch, it should be psuedo-merged (ancestry linked but no
170 content changes) back into its source branch, to dispose of the
174 DVCSOutputOption
.__init
__(self
)
175 self
.dump_filename
= dump_filename
176 self
.revision_writer
= revision_writer
178 self
.author_transforms
= self
.normalize_author_transforms(
182 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
184 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
186 def register_artifacts(self
, which_pass
):
187 DVCSOutputOption
.register_artifacts(self
, which_pass
)
188 self
.revision_writer
.register_artifacts(which_pass
)
190 def check_symbols(self
, symbol_map
):
191 # FIXME: What constraints does git impose on symbols?
194 def setup(self
, svn_rev_count
):
195 DVCSOutputOption
.setup(self
, svn_rev_count
)
196 self
.f
= open(self
.dump_filename
, 'wb')
198 # The youngest revnum that has been committed so far:
201 # A map {lod : [(revnum, mark)]} giving each of the revision
202 # numbers in which there was a commit to lod, and the mark active
203 # at the end of the revnum.
206 self
.revision_writer
.start(self
._mirror
, self
.f
)
208 def _create_commit_mark(self
, lod
, revnum
):
209 mark
= self
._mark
_generator
.gen_id()
210 self
._set
_lod
_mark
(lod
, revnum
, mark
)
213 def _set_lod_mark(self
, lod
, revnum
, mark
):
214 """Record MARK as the status of LOD for REVNUM.
216 If there is already an entry for REVNUM, overwrite it. If not,
217 append a new entry to the self._marks list for LOD."""
219 assert revnum
>= self
._youngest
220 entry
= (revnum
, mark
)
222 modifications
= self
._marks
[lod
]
224 # This LOD hasn't appeared before; create a new list and add the
226 self
._marks
[lod
] = [entry
]
228 # A record exists, so it necessarily has at least one element:
229 if modifications
[-1][0] == revnum
:
230 modifications
[-1] = entry
232 modifications
.append(entry
)
233 self
._youngest
= revnum
235 def _get_author(self
, svn_commit
):
236 """Return the author to be used for SVN_COMMIT.
238 Return the author as a UTF-8 string in the form needed by git
239 fast-import; that is, 'name <email>'."""
241 cvs_author
= svn_commit
.get_author()
242 return self
._map
_author
(cvs_author
)
244 def _map_author(self
, cvs_author
):
245 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
248 def _get_log_msg(svn_commit
):
249 return svn_commit
.get_log_msg()
251 def process_initial_project_commit(self
, svn_commit
):
252 self
._mirror
.start_commit(svn_commit
.revnum
)
253 self
._mirror
.end_commit()
255 def process_primary_commit(self
, svn_commit
):
256 author
= self
._get
_author
(svn_commit
)
257 log_msg
= self
._get
_log
_msg
(svn_commit
)
260 for cvs_rev
in svn_commit
.get_cvs_items():
261 lods
.add(cvs_rev
.lod
)
263 raise InternalError('Commit affects %d LODs' % (len(lods
),))
266 self
._mirror
.start_commit(svn_commit
.revnum
)
267 if isinstance(lod
, Trunk
):
268 # FIXME: is this correct?:
269 self
.f
.write('commit refs/heads/master\n')
271 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
274 % (self
._create
_commit
_mark
(lod
, svn_commit
.revnum
),)
277 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
279 self
.f
.write('data %d\n' % (len(log_msg
),))
280 self
.f
.write('%s\n' % (log_msg
,))
281 for cvs_rev
in svn_commit
.get_cvs_items():
282 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
285 self
._mirror
.end_commit()
287 def process_post_commit(self
, svn_commit
):
288 author
= self
._get
_author
(svn_commit
)
289 log_msg
= self
._get
_log
_msg
(svn_commit
)
292 for cvs_rev
in svn_commit
.cvs_revs
:
293 source_lods
.add(cvs_rev
.lod
)
294 if len(source_lods
) != 1:
295 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
296 source_lod
= source_lods
.pop()
298 self
._mirror
.start_commit(svn_commit
.revnum
)
299 # FIXME: is this correct?:
300 self
.f
.write('commit refs/heads/master\n')
303 % (self
._create
_commit
_mark
(None, svn_commit
.revnum
),)
306 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
308 self
.f
.write('data %d\n' % (len(log_msg
),))
309 self
.f
.write('%s\n' % (log_msg
,))
312 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
314 for cvs_rev
in svn_commit
.cvs_revs
:
315 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
318 self
._mirror
.end_commit()
320 def _get_source_mark(self
, source_lod
, revnum
):
321 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
323 modifications
= self
._marks
[source_lod
]
324 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
325 (revnum
, mark
) = modifications
[i
]
328 def describe_lod_to_user(self
, lod
):
329 """This needs to make sense to users of the fastimported result."""
330 if isinstance(lod
, Trunk
):
335 def _describe_commit(self
, svn_commit
, lod
):
336 author
= self
._map
_author
(svn_commit
.get_author())
337 if author
.endswith(" <>"):
339 date
= time
.strftime(
340 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
342 log_msg
= svn_commit
.get_log_msg()
343 if log_msg
.find('\n') != -1:
344 log_msg
= log_msg
[:log_msg
.index('\n')]
345 return "%s %s %s '%s'" % (
346 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
348 def _process_symbol_commit(self
, svn_commit
, git_branch
, source_groups
):
349 author
= self
._get
_author
(svn_commit
)
350 log_msg
= self
._get
_log
_msg
(svn_commit
)
352 # There are two distinct cases we need to care for here:
353 # 1. initial creation of a LOD
354 # 2. fixup of an existing LOD to include more files, because the LOD in
355 # CVS was created piecemeal over time, with intervening commits
357 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
358 # might be technically more correct (though _get_lod_history is currently
359 # underscore-private)
360 is_initial_lod_creation
= svn_commit
.symbol
not in self
._marks
362 # Create the mark, only after the check above
363 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
365 if is_initial_lod_creation
:
366 # Get the primary parent
367 p_source_revnum
, p_source_lod
, p_cvs_symbols
= source_groups
[0]
369 p_source_node
= self
._mirror
.get_old_lod_directory(
370 p_source_lod
, p_source_revnum
373 raise InternalError('Source %r does not exist' % (p_source_lod
,))
374 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
376 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
377 for cvs_symbol
in cvs_symbols
:
378 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
380 # Write a trailer to the log message which describes the cherrypicks that
381 # make up this symbol creation.
383 if is_initial_lod_creation
:
384 log_msg
+= "\nSprout from %s" % (
385 self
._describe
_commit
(
386 Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
390 for (source_revnum
, source_lod
, cvs_symbols
,) \
391 in source_groups
[(is_initial_lod_creation
and 1 or 0):]:
392 log_msg
+= "\nCherrypick from %s:" % (
393 self
._describe
_commit
(
394 Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
398 for cvs_path
in sorted(
399 cvs_symbol
.cvs_file
.cvs_path
for cvs_symbol
in cvs_symbols
401 log_msg
+= "\n %s" % (cvs_path
,)
402 if is_initial_lod_creation
:
403 if cvs_files_to_delete
:
404 log_msg
+= "\nDelete:"
405 for cvs_path
in sorted(
406 cvs_file
.cvs_path
for cvs_file
in cvs_files_to_delete
408 log_msg
+= "\n %s" % (cvs_path
,)
410 self
.f
.write('commit %s\n' % (git_branch
,))
411 self
.f
.write('mark :%d\n' % (mark
,))
412 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
413 self
.f
.write('data %d\n' % (len(log_msg
),))
414 self
.f
.write('%s\n' % (log_msg
,))
416 # Only record actual DVCS ancestry for the primary sprout parent,
417 # all the rest are effectively cherrypicks.
418 if is_initial_lod_creation
:
421 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
424 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
425 for cvs_symbol
in cvs_symbols
:
426 self
.revision_writer
.branch_file(cvs_symbol
)
428 if is_initial_lod_creation
:
429 for cvs_file
in cvs_files_to_delete
:
430 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
435 def process_branch_commit(self
, svn_commit
):
436 self
._mirror
.start_commit(svn_commit
.revnum
)
438 source_groups
= self
._get
_source
_groups
(svn_commit
)
439 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
440 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
442 '%s will be created via a simple copy from %s:r%d'
443 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
445 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
446 self
._set
_symbol
(svn_commit
.symbol
, mark
)
447 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
448 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
451 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
453 self
._process
_symbol
_commit
(
454 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
458 self
._mirror
.end_commit()
460 def _set_symbol(self
, symbol
, mark
):
461 if isinstance(symbol
, Branch
):
463 elif isinstance(symbol
, Tag
):
466 raise InternalError()
467 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
468 self
.f
.write('from :%d\n' % (mark
,))
470 def get_tag_fixup_branch_name(self
, svn_commit
):
471 # The branch name to use for the "tag fixup branches". The
472 # git-fast-import documentation suggests using 'TAG_FIXUP'
473 # (outside of the refs/heads namespace), but this is currently
474 # broken. Use a name containing '.', which is not allowed in CVS
475 # symbols, to avoid conflicts (though of course a conflict could
476 # still result if the user requests symbol transformations).
477 return 'refs/heads/TAG.FIXUP'
479 def process_tag_commit(self
, svn_commit
):
480 # FIXME: For now we create a fixup branch with the same name as
481 # the tag, then the tag. We never delete the fixup branch.
482 self
._mirror
.start_commit(svn_commit
.revnum
)
484 source_groups
= self
._get
_source
_groups
(svn_commit
)
485 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
486 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
488 '%s will be created via a simple copy from %s:r%d'
489 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
491 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
492 self
._set
_symbol
(svn_commit
.symbol
, mark
)
493 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
494 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
497 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
500 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
502 # Create the fixup branch (which might involve making more than
504 mark
= self
._process
_symbol
_commit
(
505 svn_commit
, fixup_branch_name
, source_groups
508 # Store the mark of the last commit to the fixup branch as the
510 self
._set
_symbol
(svn_commit
.symbol
, mark
)
511 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
514 if self
.tie_tag_fixup_branches
:
515 source_lod
= source_groups
[0][1]
516 source_lod_git_branch
= \
517 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
519 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
520 author
= self
._map
_author
(Ctx().username
)
521 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
523 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
524 self
.f
.write('mark :%d\n' % (mark2
,))
525 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
526 self
.f
.write('data %d\n' % (len(log_msg
),))
527 self
.f
.write('%s\n' % (log_msg
,))
536 self
._mirror
.end_commit()
538 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
539 return Ctx().text_wrapper
.fill(
540 Ctx().tie_tag_ancestry_message
% {
541 'symbol_name' : svn_commit
.symbol
.name
,
546 DVCSOutputOption
.cleanup(self
)
547 self
.revision_writer
.finish()