1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
30 from cvs2svn_lib
import config
31 from cvs2svn_lib
.common
import InternalError
32 from cvs2svn_lib
.log
import logger
33 from cvs2svn_lib
.context
import Ctx
34 from cvs2svn_lib
.symbol
import Trunk
35 from cvs2svn_lib
.symbol
import Branch
36 from cvs2svn_lib
.symbol
import Tag
37 from cvs2svn_lib
.cvs_item
import CVSSymbol
38 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
39 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
40 from cvs2svn_lib
.key_generator
import KeyGenerator
41 from cvs2svn_lib
.artifact_manager
import artifact_manager
44 class GitRevisionWriter(MirrorUpdater
):
46 def start(self
, mirror
, f
):
47 MirrorUpdater
.start(self
, mirror
)
50 def _modify_file(self
, cvs_item
, post_commit
):
51 raise NotImplementedError()
53 def add_file(self
, cvs_rev
, post_commit
):
54 MirrorUpdater
.add_file(self
, cvs_rev
, post_commit
)
55 self
._modify
_file
(cvs_rev
, post_commit
)
57 def modify_file(self
, cvs_rev
, post_commit
):
58 MirrorUpdater
.modify_file(self
, cvs_rev
, post_commit
)
59 self
._modify
_file
(cvs_rev
, post_commit
)
61 def delete_file(self
, cvs_rev
, post_commit
):
62 MirrorUpdater
.delete_file(self
, cvs_rev
, post_commit
)
63 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
65 def branch_file(self
, cvs_symbol
):
66 MirrorUpdater
.branch_file(self
, cvs_symbol
)
67 self
._modify
_file
(cvs_symbol
, post_commit
=False)
70 MirrorUpdater
.finish(self
)
74 class GitRevisionMarkWriter(GitRevisionWriter
):
75 def register_artifacts(self
, which_pass
):
76 GitRevisionWriter
.register_artifacts(self
, which_pass
)
77 if Ctx().revision_collector
.blob_filename
is None:
78 artifact_manager
.register_temp_file_needed(
79 config
.GIT_BLOB_DATAFILE
, which_pass
,
82 def start(self
, mirror
, f
):
83 GitRevisionWriter
.start(self
, mirror
, f
)
84 if Ctx().revision_collector
.blob_filename
is None:
85 # The revision collector wrote the blobs to a temporary file;
87 logger
.normal('Copying blob data to output')
89 artifact_manager
.get_temp_file(config
.GIT_BLOB_DATAFILE
), 'rb',
91 shutil
.copyfileobj(blobf
, f
)
94 def _modify_file(self
, cvs_item
, post_commit
):
95 if cvs_item
.cvs_file
.executable
:
102 % (mode
, cvs_item
.revision_reader_token
,
103 cvs_item
.cvs_file
.cvs_path
,)
107 class GitRevisionInlineWriter(GitRevisionWriter
):
108 def __init__(self
, revision_reader
):
109 self
.revision_reader
= revision_reader
111 def register_artifacts(self
, which_pass
):
112 GitRevisionWriter
.register_artifacts(self
, which_pass
)
113 self
.revision_reader
.register_artifacts(which_pass
)
115 def start(self
, mirror
, f
):
116 GitRevisionWriter
.start(self
, mirror
, f
)
117 self
.revision_reader
.start()
119 def _modify_file(self
, cvs_item
, post_commit
):
120 if cvs_item
.cvs_file
.executable
:
127 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
130 if isinstance(cvs_item
, CVSSymbol
):
131 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
135 # FIXME: We have to decide what to do about keyword substitution
136 # and eol_style here:
137 fulltext
= self
.revision_reader
.get_content(cvs_rev
)
139 self
.f
.write('data %d\n' % (len(fulltext
),))
140 self
.f
.write(fulltext
)
144 GitRevisionWriter
.finish(self
)
145 self
.revision_reader
.finish()
148 class GitOutputOption(DVCSOutputOption
):
149 """An OutputOption that outputs to a git-fast-import formatted file.
153 dump_filename -- (string or None) the name of the file to which
154 the git-fast-import commands for defining revisions will be
155 written. If None, the data will be written to stdout.
157 author_transforms -- a map from CVS author names to git full name
158 and email address. See
159 DVCSOutputOption.normalize_author_transforms() for information
160 about the form of this parameter.
166 # The first mark number used for git-fast-import commit marks. This
167 # value needs to be large to avoid conflicts with blob marks.
168 _first_commit_mark
= 1000000000
171 self
, revision_writer
,
173 author_transforms
=None,
174 tie_tag_fixup_branches
=False,
178 REVISION_WRITER is a GitRevisionWriter that is used to output
179 either the content of revisions or a mark that was previously used
182 DUMP_FILENAME is the name of the file to which the git-fast-import
183 commands for defining revisions should be written. (Please note
184 that depending on the style of revision writer, the actual file
185 contents might not be written to this file.) If it is None, then
186 the output is written to stdout.
188 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
189 CVS author names to git full name and email address. All of the
190 contents should either be Unicode strings or 8-bit strings encoded
193 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
194 fixup branch, it should be psuedo-merged (ancestry linked but no
195 content changes) back into its source branch, to dispose of the
199 DVCSOutputOption
.__init
__(self
)
200 self
.dump_filename
= dump_filename
201 self
.revision_writer
= revision_writer
203 self
.author_transforms
= self
.normalize_author_transforms(
207 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
209 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
211 def register_artifacts(self
, which_pass
):
212 DVCSOutputOption
.register_artifacts(self
, which_pass
)
213 self
.revision_writer
.register_artifacts(which_pass
)
215 def check_symbols(self
, symbol_map
):
216 # FIXME: What constraints does git impose on symbols?
219 def setup(self
, svn_rev_count
):
220 DVCSOutputOption
.setup(self
, svn_rev_count
)
221 if self
.dump_filename
is None:
224 self
.f
= open(self
.dump_filename
, 'wb')
226 # The youngest revnum that has been committed so far:
229 # A map {lod : [(revnum, mark)]} giving each of the revision
230 # numbers in which there was a commit to lod, and the mark active
231 # at the end of the revnum.
234 self
.revision_writer
.start(self
._mirror
, self
.f
)
236 def _create_commit_mark(self
, lod
, revnum
):
237 mark
= self
._mark
_generator
.gen_id()
238 self
._set
_lod
_mark
(lod
, revnum
, mark
)
241 def _set_lod_mark(self
, lod
, revnum
, mark
):
242 """Record MARK as the status of LOD for REVNUM.
244 If there is already an entry for REVNUM, overwrite it. If not,
245 append a new entry to the self._marks list for LOD."""
247 assert revnum
>= self
._youngest
248 entry
= (revnum
, mark
)
250 modifications
= self
._marks
[lod
]
252 # This LOD hasn't appeared before; create a new list and add the
254 self
._marks
[lod
] = [entry
]
256 # A record exists, so it necessarily has at least one element:
257 if modifications
[-1][0] == revnum
:
258 modifications
[-1] = entry
260 modifications
.append(entry
)
261 self
._youngest
= revnum
263 def _get_author(self
, svn_commit
):
264 """Return the author to be used for SVN_COMMIT.
266 Return the author as a UTF-8 string in the form needed by git
267 fast-import; that is, 'name <email>'."""
269 cvs_author
= svn_commit
.get_author()
270 return self
._map
_author
(cvs_author
)
272 def _map_author(self
, cvs_author
):
273 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
276 def _get_log_msg(svn_commit
):
277 return svn_commit
.get_log_msg()
279 def process_initial_project_commit(self
, svn_commit
):
280 self
._mirror
.start_commit(svn_commit
.revnum
)
281 self
._mirror
.end_commit()
283 def process_primary_commit(self
, svn_commit
):
284 author
= self
._get
_author
(svn_commit
)
285 log_msg
= self
._get
_log
_msg
(svn_commit
)
288 for cvs_rev
in svn_commit
.get_cvs_items():
289 lods
.add(cvs_rev
.lod
)
291 raise InternalError('Commit affects %d LODs' % (len(lods
),))
294 self
._mirror
.start_commit(svn_commit
.revnum
)
295 if isinstance(lod
, Trunk
):
296 # FIXME: is this correct?:
297 self
.f
.write('commit refs/heads/master\n')
299 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
300 mark
= self
._create
_commit
_mark
(lod
, svn_commit
.revnum
)
302 'Writing commit r%d on %s (mark :%d)'
303 % (svn_commit
.revnum
, lod
, mark
,)
305 self
.f
.write('mark :%d\n' % (mark
,))
307 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
309 self
.f
.write('data %d\n' % (len(log_msg
),))
310 self
.f
.write('%s\n' % (log_msg
,))
311 for cvs_rev
in svn_commit
.get_cvs_items():
312 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
315 self
._mirror
.end_commit()
317 def process_post_commit(self
, svn_commit
):
318 author
= self
._get
_author
(svn_commit
)
319 log_msg
= self
._get
_log
_msg
(svn_commit
)
322 for cvs_rev
in svn_commit
.cvs_revs
:
323 source_lods
.add(cvs_rev
.lod
)
324 if len(source_lods
) != 1:
325 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
326 source_lod
= source_lods
.pop()
328 self
._mirror
.start_commit(svn_commit
.revnum
)
329 # FIXME: is this correct?:
330 self
.f
.write('commit refs/heads/master\n')
331 mark
= self
._create
_commit
_mark
(None, svn_commit
.revnum
)
333 'Writing post-commit r%d on Trunk (mark :%d)'
334 % (svn_commit
.revnum
, mark
,)
336 self
.f
.write('mark :%d\n' % (mark
,))
338 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
340 self
.f
.write('data %d\n' % (len(log_msg
),))
341 self
.f
.write('%s\n' % (log_msg
,))
344 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
346 for cvs_rev
in svn_commit
.cvs_revs
:
347 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
350 self
._mirror
.end_commit()
352 def _get_source_mark(self
, source_lod
, revnum
):
353 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
355 modifications
= self
._marks
[source_lod
]
356 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
357 (revnum
, mark
) = modifications
[i
]
360 def describe_lod_to_user(self
, lod
):
361 """This needs to make sense to users of the fastimported result."""
362 if isinstance(lod
, Trunk
):
367 def _describe_commit(self
, svn_commit
, lod
):
368 author
= self
._map
_author
(svn_commit
.get_author())
369 if author
.endswith(" <>"):
371 date
= time
.strftime(
372 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
374 log_msg
= svn_commit
.get_log_msg()
375 if log_msg
.find('\n') != -1:
376 log_msg
= log_msg
[:log_msg
.index('\n')]
377 return "%s %s %s '%s'" % (
378 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
380 def _process_symbol_commit(self
, svn_commit
, git_branch
, source_groups
):
381 author
= self
._get
_author
(svn_commit
)
382 log_msg
= self
._get
_log
_msg
(svn_commit
)
384 # There are two distinct cases we need to care for here:
385 # 1. initial creation of a LOD
386 # 2. fixup of an existing LOD to include more files, because the LOD in
387 # CVS was created piecemeal over time, with intervening commits
389 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
390 # might be technically more correct (though _get_lod_history is currently
391 # underscore-private)
392 is_initial_lod_creation
= svn_commit
.symbol
not in self
._marks
394 # Create the mark, only after the check above
395 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
397 if is_initial_lod_creation
:
398 # Get the primary parent
399 p_source_revnum
, p_source_lod
, p_cvs_symbols
= source_groups
[0]
401 p_source_node
= self
._mirror
.get_old_lod_directory(
402 p_source_lod
, p_source_revnum
405 raise InternalError('Source %r does not exist' % (p_source_lod
,))
406 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
408 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
409 for cvs_symbol
in cvs_symbols
:
410 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
412 # Write a trailer to the log message which describes the cherrypicks that
413 # make up this symbol creation.
415 if is_initial_lod_creation
:
416 log_msg
+= "\nSprout from %s" % (
417 self
._describe
_commit
(
418 Ctx()._persistence
_manager
.get_svn_commit(p_source_revnum
),
422 for (source_revnum
, source_lod
, cvs_symbols
,) \
423 in source_groups
[(is_initial_lod_creation
and 1 or 0):]:
424 log_msg
+= "\nCherrypick from %s:" % (
425 self
._describe
_commit
(
426 Ctx()._persistence
_manager
.get_svn_commit(source_revnum
),
430 for cvs_path
in sorted(
431 cvs_symbol
.cvs_file
.cvs_path
for cvs_symbol
in cvs_symbols
433 log_msg
+= "\n %s" % (cvs_path
,)
434 if is_initial_lod_creation
:
435 if cvs_files_to_delete
:
436 log_msg
+= "\nDelete:"
437 for cvs_path
in sorted(
438 cvs_file
.cvs_path
for cvs_file
in cvs_files_to_delete
440 log_msg
+= "\n %s" % (cvs_path
,)
442 self
.f
.write('commit %s\n' % (git_branch
,))
443 self
.f
.write('mark :%d\n' % (mark
,))
444 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
445 self
.f
.write('data %d\n' % (len(log_msg
),))
446 self
.f
.write('%s\n' % (log_msg
,))
448 # Only record actual DVCS ancestry for the primary sprout parent,
449 # all the rest are effectively cherrypicks.
450 if is_initial_lod_creation
:
453 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
456 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
457 for cvs_symbol
in cvs_symbols
:
458 self
.revision_writer
.branch_file(cvs_symbol
)
460 if is_initial_lod_creation
:
461 for cvs_file
in cvs_files_to_delete
:
462 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
467 def process_branch_commit(self
, svn_commit
):
468 self
._mirror
.start_commit(svn_commit
.revnum
)
470 source_groups
= self
._get
_source
_groups
(svn_commit
)
471 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
472 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
474 '%s will be created via a simple copy from %s:r%d'
475 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
477 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
478 self
._set
_symbol
(svn_commit
.symbol
, mark
)
479 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
480 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
483 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
485 self
._process
_symbol
_commit
(
486 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
490 self
._mirror
.end_commit()
492 def _set_symbol(self
, symbol
, mark
):
493 if isinstance(symbol
, Branch
):
495 elif isinstance(symbol
, Tag
):
498 raise InternalError()
499 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
500 self
.f
.write('from :%d\n' % (mark
,))
502 def get_tag_fixup_branch_name(self
, svn_commit
):
503 # The branch name to use for the "tag fixup branches". The
504 # git-fast-import documentation suggests using 'TAG_FIXUP'
505 # (outside of the refs/heads namespace), but this is currently
506 # broken. Use a name containing '.', which is not allowed in CVS
507 # symbols, to avoid conflicts (though of course a conflict could
508 # still result if the user requests symbol transformations).
509 return 'refs/heads/TAG.FIXUP'
511 def process_tag_commit(self
, svn_commit
):
512 # FIXME: For now we create a fixup branch with the same name as
513 # the tag, then the tag. We never delete the fixup branch.
514 self
._mirror
.start_commit(svn_commit
.revnum
)
516 source_groups
= self
._get
_source
_groups
(svn_commit
)
517 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
518 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
520 '%s will be created via a simple copy from %s:r%d'
521 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
523 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
524 self
._set
_symbol
(svn_commit
.symbol
, mark
)
525 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
526 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
529 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
532 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
534 # Create the fixup branch (which might involve making more than
536 mark
= self
._process
_symbol
_commit
(
537 svn_commit
, fixup_branch_name
, source_groups
540 # Store the mark of the last commit to the fixup branch as the
542 self
._set
_symbol
(svn_commit
.symbol
, mark
)
543 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
546 if self
.tie_tag_fixup_branches
:
547 source_lod
= source_groups
[0][1]
548 source_lod_git_branch
= \
549 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
551 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
552 author
= self
._map
_author
(Ctx().username
)
553 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
555 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
556 self
.f
.write('mark :%d\n' % (mark2
,))
557 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
558 self
.f
.write('data %d\n' % (len(log_msg
),))
559 self
.f
.write('%s\n' % (log_msg
,))
568 self
._mirror
.end_commit()
570 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
571 return Ctx().text_wrapper
.fill(
572 Ctx().tie_tag_ancestry_message
% {
573 'symbol_name' : svn_commit
.symbol
.name
,
578 DVCSOutputOption
.cleanup(self
)
579 self
.revision_writer
.finish()
580 if self
.dump_filename
is not None: