1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
30 from cvs2svn_lib
import config
31 from cvs2svn_lib
.common
import InternalError
32 from cvs2svn_lib
.log
import logger
33 from cvs2svn_lib
.context
import Ctx
34 from cvs2svn_lib
.symbol
import Trunk
35 from cvs2svn_lib
.symbol
import Branch
36 from cvs2svn_lib
.symbol
import Tag
37 from cvs2svn_lib
.cvs_item
import CVSSymbol
38 from cvs2svn_lib
.dvcs_common
import DVCSOutputOption
39 from cvs2svn_lib
.dvcs_common
import MirrorUpdater
40 from cvs2svn_lib
.key_generator
import KeyGenerator
41 from cvs2svn_lib
.artifact_manager
import artifact_manager
44 class GitRevisionWriter(MirrorUpdater
):
46 def start(self
, mirror
, f
):
47 MirrorUpdater
.start(self
, mirror
)
50 def _modify_file(self
, cvs_item
, post_commit
):
51 raise NotImplementedError()
53 def add_file(self
, cvs_rev
, post_commit
):
54 MirrorUpdater
.add_file(self
, cvs_rev
, post_commit
)
55 self
._modify
_file
(cvs_rev
, post_commit
)
57 def modify_file(self
, cvs_rev
, post_commit
):
58 MirrorUpdater
.modify_file(self
, cvs_rev
, post_commit
)
59 self
._modify
_file
(cvs_rev
, post_commit
)
61 def delete_file(self
, cvs_rev
, post_commit
):
62 MirrorUpdater
.delete_file(self
, cvs_rev
, post_commit
)
63 self
.f
.write('D %s\n' % (cvs_rev
.cvs_file
.cvs_path
,))
65 def branch_file(self
, cvs_symbol
):
66 MirrorUpdater
.branch_file(self
, cvs_symbol
)
67 self
._modify
_file
(cvs_symbol
, post_commit
=False)
70 MirrorUpdater
.finish(self
)
74 class GitRevisionMarkWriter(GitRevisionWriter
):
75 def register_artifacts(self
, which_pass
):
76 GitRevisionWriter
.register_artifacts(self
, which_pass
)
77 if Ctx().revision_collector
.blob_filename
is None:
78 artifact_manager
.register_temp_file_needed(
79 config
.GIT_BLOB_DATAFILE
, which_pass
,
82 def start(self
, mirror
, f
):
83 GitRevisionWriter
.start(self
, mirror
, f
)
84 if Ctx().revision_collector
.blob_filename
is None:
85 # The revision collector wrote the blobs to a temporary file;
87 logger
.normal('Copying blob data to output')
89 artifact_manager
.get_temp_file(config
.GIT_BLOB_DATAFILE
), 'rb',
91 shutil
.copyfileobj(blobf
, f
)
94 def _modify_file(self
, cvs_item
, post_commit
):
95 if cvs_item
.cvs_file
.executable
:
102 % (mode
, cvs_item
.revision_reader_token
,
103 cvs_item
.cvs_file
.cvs_path
,)
107 class GitRevisionInlineWriter(GitRevisionWriter
):
108 def __init__(self
, revision_reader
):
109 self
.revision_reader
= revision_reader
111 def register_artifacts(self
, which_pass
):
112 GitRevisionWriter
.register_artifacts(self
, which_pass
)
113 self
.revision_reader
.register_artifacts(which_pass
)
115 def start(self
, mirror
, f
):
116 GitRevisionWriter
.start(self
, mirror
, f
)
117 self
.revision_reader
.start()
119 def _modify_file(self
, cvs_item
, post_commit
):
120 if cvs_item
.cvs_file
.executable
:
127 % (mode
, cvs_item
.cvs_file
.cvs_path
,)
130 if isinstance(cvs_item
, CVSSymbol
):
131 cvs_rev
= cvs_item
.get_cvs_revision_source(Ctx()._cvs
_items
_db
)
135 # FIXME: We have to decide what to do about keyword substitution
136 # and eol_style here:
137 fulltext
= self
.revision_reader
.get_content(cvs_rev
)
139 self
.f
.write('data %d\n' % (len(fulltext
),))
140 self
.f
.write(fulltext
)
144 GitRevisionWriter
.finish(self
)
145 self
.revision_reader
.finish()
148 class GitOutputOption(DVCSOutputOption
):
149 """An OutputOption that outputs to a git-fast-import formatted file.
153 dump_filename -- (string or None) the name of the file to which
154 the git-fast-import commands for defining revisions will be
155 written. If None, the data will be written to stdout.
157 author_transforms -- a map from CVS author names to git full name
158 and email address. See
159 DVCSOutputOption.normalize_author_transforms() for information
160 about the form of this parameter.
166 # The first mark number used for git-fast-import commit marks. This
167 # value needs to be large to avoid conflicts with blob marks.
168 _first_commit_mark
= 1000000000
171 self
, revision_writer
,
173 author_transforms
=None,
174 tie_tag_fixup_branches
=False,
178 REVISION_WRITER is a GitRevisionWriter that is used to output
179 either the content of revisions or a mark that was previously used
182 DUMP_FILENAME is the name of the file to which the git-fast-import
183 commands for defining revisions should be written. (Please note
184 that depending on the style of revision writer, the actual file
185 contents might not be written to this file.) If it is None, then
186 the output is written to stdout.
188 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
189 CVS author names to git full name and email address. All of the
190 contents should either be Unicode strings or 8-bit strings encoded
193 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
194 fixup branch, it should be psuedo-merged (ancestry linked but no
195 content changes) back into its source branch, to dispose of the
199 DVCSOutputOption
.__init
__(self
)
200 self
.dump_filename
= dump_filename
201 self
.revision_writer
= revision_writer
203 self
.author_transforms
= self
.normalize_author_transforms(
207 self
.tie_tag_fixup_branches
= tie_tag_fixup_branches
209 self
._mark
_generator
= KeyGenerator(GitOutputOption
._first
_commit
_mark
)
211 def register_artifacts(self
, which_pass
):
212 DVCSOutputOption
.register_artifacts(self
, which_pass
)
213 self
.revision_writer
.register_artifacts(which_pass
)
215 def check_symbols(self
, symbol_map
):
216 # FIXME: What constraints does git impose on symbols?
219 def setup(self
, svn_rev_count
):
220 DVCSOutputOption
.setup(self
, svn_rev_count
)
221 if self
.dump_filename
is None:
224 self
.f
= open(self
.dump_filename
, 'wb')
226 # The youngest revnum that has been committed so far:
229 # A map {lod : [(revnum, mark)]} giving each of the revision
230 # numbers in which there was a commit to lod, and the mark active
231 # at the end of the revnum.
234 self
.revision_writer
.start(self
._mirror
, self
.f
)
236 def _create_commit_mark(self
, lod
, revnum
):
237 mark
= self
._mark
_generator
.gen_id()
238 self
._set
_lod
_mark
(lod
, revnum
, mark
)
241 def _set_lod_mark(self
, lod
, revnum
, mark
):
242 """Record MARK as the status of LOD for REVNUM.
244 If there is already an entry for REVNUM, overwrite it. If not,
245 append a new entry to the self._marks list for LOD."""
247 assert revnum
>= self
._youngest
248 entry
= (revnum
, mark
)
250 modifications
= self
._marks
[lod
]
252 # This LOD hasn't appeared before; create a new list and add the
254 self
._marks
[lod
] = [entry
]
256 # A record exists, so it necessarily has at least one element:
257 if modifications
[-1][0] == revnum
:
258 modifications
[-1] = entry
260 modifications
.append(entry
)
261 self
._youngest
= revnum
263 def _get_author(self
, svn_commit
):
264 """Return the author to be used for SVN_COMMIT.
266 Return the author as a UTF-8 string in the form needed by git
267 fast-import; that is, 'name <email>'."""
269 cvs_author
= svn_commit
.get_author()
270 return self
._map
_author
(cvs_author
)
272 def _map_author(self
, cvs_author
):
273 return self
.author_transforms
.get(cvs_author
, "%s <>" % (cvs_author
,))
276 def _get_log_msg(svn_commit
):
277 return svn_commit
.get_log_msg()
279 def process_initial_project_commit(self
, svn_commit
):
280 self
._mirror
.start_commit(svn_commit
.revnum
)
281 self
._mirror
.end_commit()
283 def process_primary_commit(self
, svn_commit
):
284 author
= self
._get
_author
(svn_commit
)
285 log_msg
= self
._get
_log
_msg
(svn_commit
)
288 for cvs_rev
in svn_commit
.get_cvs_items():
289 lods
.add(cvs_rev
.lod
)
291 raise InternalError('Commit affects %d LODs' % (len(lods
),))
294 self
._mirror
.start_commit(svn_commit
.revnum
)
295 if isinstance(lod
, Trunk
):
296 # FIXME: is this correct?:
297 self
.f
.write('commit refs/heads/master\n')
299 self
.f
.write('commit refs/heads/%s\n' % (lod
.name
,))
300 mark
= self
._create
_commit
_mark
(lod
, svn_commit
.revnum
)
302 'Writing commit r%d on %s (mark :%d)'
303 % (svn_commit
.revnum
, lod
, mark
,)
305 self
.f
.write('mark :%d\n' % (mark
,))
307 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
309 self
.f
.write('data %d\n' % (len(log_msg
),))
310 self
.f
.write('%s\n' % (log_msg
,))
311 for cvs_rev
in svn_commit
.get_cvs_items():
312 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=False)
315 self
._mirror
.end_commit()
317 def process_post_commit(self
, svn_commit
):
318 author
= self
._get
_author
(svn_commit
)
319 log_msg
= self
._get
_log
_msg
(svn_commit
)
322 for cvs_rev
in svn_commit
.cvs_revs
:
323 source_lods
.add(cvs_rev
.lod
)
324 if len(source_lods
) != 1:
325 raise InternalError('Commit is from %d LODs' % (len(source_lods
),))
326 source_lod
= source_lods
.pop()
328 self
._mirror
.start_commit(svn_commit
.revnum
)
329 # FIXME: is this correct?:
330 self
.f
.write('commit refs/heads/master\n')
331 mark
= self
._create
_commit
_mark
(None, svn_commit
.revnum
)
333 'Writing post-commit r%d on Trunk (mark :%d)'
334 % (svn_commit
.revnum
, mark
,)
336 self
.f
.write('mark :%d\n' % (mark
,))
338 'committer %s %d +0000\n' % (author
, svn_commit
.date
,)
340 self
.f
.write('data %d\n' % (len(log_msg
),))
341 self
.f
.write('%s\n' % (log_msg
,))
344 % (self
._get
_source
_mark
(source_lod
, svn_commit
.revnum
),)
346 for cvs_rev
in svn_commit
.cvs_revs
:
347 self
.revision_writer
.process_revision(cvs_rev
, post_commit
=True)
350 self
._mirror
.end_commit()
352 def _get_source_mark(self
, source_lod
, revnum
):
353 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
355 modifications
= self
._marks
[source_lod
]
356 i
= bisect
.bisect_left(modifications
, (revnum
+ 1,)) - 1
357 (revnum
, mark
) = modifications
[i
]
360 def describe_lod_to_user(self
, lod
):
361 """This needs to make sense to users of the fastimported result."""
362 if isinstance(lod
, Trunk
):
367 def _describe_commit(self
, svn_commit
, lod
):
368 author
= self
._map
_author
(svn_commit
.get_author())
369 if author
.endswith(" <>"):
371 date
= time
.strftime(
372 "%Y-%m-%d %H:%M:%S UTC", time
.gmtime(svn_commit
.date
)
374 log_msg
= svn_commit
.get_log_msg()
375 if log_msg
.find('\n') != -1:
376 log_msg
= log_msg
[:log_msg
.index('\n')]
377 return "%s %s %s '%s'" % (
378 self
.describe_lod_to_user(lod
), date
, author
, log_msg
,)
380 def _process_symbol_commit(self
, svn_commit
, git_branch
, source_groups
):
381 author
= self
._get
_author
(svn_commit
)
382 log_msg
= self
._get
_log
_msg
(svn_commit
)
384 # There are two distinct cases we need to care for here:
385 # 1. initial creation of a LOD
386 # 2. fixup of an existing LOD to include more files, because the LOD in
387 # CVS was created piecemeal over time, with intervening commits
389 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
390 # might be technically more correct (though _get_lod_history is currently
391 # underscore-private)
392 is_initial_lod_creation
= svn_commit
.symbol
not in self
._marks
394 # Create the mark, only after the check above
395 mark
= self
._create
_commit
_mark
(svn_commit
.symbol
, svn_commit
.revnum
)
397 if is_initial_lod_creation
:
398 # Get the primary parent
399 p_source_revnum
, p_source_lod
, p_cvs_symbols
= source_groups
[0]
401 p_source_node
= self
._mirror
.get_old_lod_directory(
402 p_source_lod
, p_source_revnum
405 raise InternalError('Source %r does not exist' % (p_source_lod
,))
406 cvs_files_to_delete
= set(self
._get
_all
_files
(p_source_node
))
408 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
409 for cvs_symbol
in cvs_symbols
:
410 cvs_files_to_delete
.discard(cvs_symbol
.cvs_file
)
412 self
.f
.write('commit %s\n' % (git_branch
,))
413 self
.f
.write('mark :%d\n' % (mark
,))
414 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
415 self
.f
.write('data %d\n' % (len(log_msg
),))
416 self
.f
.write('%s\n' % (log_msg
,))
418 # Only record actual DVCS ancestry for the primary sprout parent,
419 # all the rest are effectively cherrypicks.
420 if is_initial_lod_creation
:
423 % (self
._get
_source
_mark
(p_source_lod
, p_source_revnum
),)
426 for (source_revnum
, source_lod
, cvs_symbols
,) in source_groups
:
427 for cvs_symbol
in cvs_symbols
:
428 self
.revision_writer
.branch_file(cvs_symbol
)
430 if is_initial_lod_creation
:
431 for cvs_file
in cvs_files_to_delete
:
432 self
.f
.write('D %s\n' % (cvs_file
.cvs_path
,))
437 def process_branch_commit(self
, svn_commit
):
438 self
._mirror
.start_commit(svn_commit
.revnum
)
440 source_groups
= self
._get
_source
_groups
(svn_commit
)
441 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
442 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
444 '%s will be created via a simple copy from %s:r%d'
445 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
447 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
448 self
._set
_symbol
(svn_commit
.symbol
, mark
)
449 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
450 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
453 '%s will be created via fixup commit(s)' % (svn_commit
.symbol
,)
455 self
._process
_symbol
_commit
(
456 svn_commit
, 'refs/heads/%s' % (svn_commit
.symbol
.name
,),
460 self
._mirror
.end_commit()
462 def _set_symbol(self
, symbol
, mark
):
463 if isinstance(symbol
, Branch
):
465 elif isinstance(symbol
, Tag
):
468 raise InternalError()
469 self
.f
.write('reset refs/%s/%s\n' % (category
, symbol
.name
,))
470 self
.f
.write('from :%d\n' % (mark
,))
472 def get_tag_fixup_branch_name(self
, svn_commit
):
473 # The branch name to use for the "tag fixup branches". The
474 # git-fast-import documentation suggests using 'TAG_FIXUP'
475 # (outside of the refs/heads namespace), but this is currently
476 # broken. Use a name containing '.', which is not allowed in CVS
477 # symbols, to avoid conflicts (though of course a conflict could
478 # still result if the user requests symbol transformations).
479 return 'refs/heads/TAG.FIXUP'
481 def process_tag_commit(self
, svn_commit
):
482 # FIXME: For now we create a fixup branch with the same name as
483 # the tag, then the tag. We never delete the fixup branch.
484 self
._mirror
.start_commit(svn_commit
.revnum
)
486 source_groups
= self
._get
_source
_groups
(svn_commit
)
487 if self
._is
_simple
_copy
(svn_commit
, source_groups
):
488 (source_revnum
, source_lod
, cvs_symbols
) = source_groups
[0]
490 '%s will be created via a simple copy from %s:r%d'
491 % (svn_commit
.symbol
, source_lod
, source_revnum
,)
493 mark
= self
._get
_source
_mark
(source_lod
, source_revnum
)
494 self
._set
_symbol
(svn_commit
.symbol
, mark
)
495 self
._mirror
.copy_lod(source_lod
, svn_commit
.symbol
, source_revnum
)
496 self
._set
_lod
_mark
(svn_commit
.symbol
, svn_commit
.revnum
, mark
)
499 '%s will be created via a fixup branch' % (svn_commit
.symbol
,)
502 fixup_branch_name
= self
.get_tag_fixup_branch_name(svn_commit
)
504 # Create the fixup branch (which might involve making more than
506 mark
= self
._process
_symbol
_commit
(
507 svn_commit
, fixup_branch_name
, source_groups
510 # Store the mark of the last commit to the fixup branch as the
512 self
._set
_symbol
(svn_commit
.symbol
, mark
)
513 self
.f
.write('reset %s\n' % (fixup_branch_name
,))
516 if self
.tie_tag_fixup_branches
:
517 source_lod
= source_groups
[0][1]
518 source_lod_git_branch
= \
519 'refs/heads/%s' % (getattr(source_lod
, 'name', 'master'),)
521 mark2
= self
._create
_commit
_mark
(source_lod
, svn_commit
.revnum
)
522 author
= self
._map
_author
(Ctx().username
)
523 log_msg
= self
._get
_log
_msg
_for
_ancestry
_tie
(svn_commit
)
525 self
.f
.write('commit %s\n' % (source_lod_git_branch
,))
526 self
.f
.write('mark :%d\n' % (mark2
,))
527 self
.f
.write('committer %s %d +0000\n' % (author
, svn_commit
.date
,))
528 self
.f
.write('data %d\n' % (len(log_msg
),))
529 self
.f
.write('%s\n' % (log_msg
,))
538 self
._mirror
.end_commit()
540 def _get_log_msg_for_ancestry_tie(self
, svn_commit
):
541 return Ctx().text_wrapper
.fill(
542 Ctx().tie_tag_ancestry_message
% {
543 'symbol_name' : svn_commit
.symbol
.name
,
548 DVCSOutputOption
.cleanup(self
)
549 self
.revision_writer
.finish()
550 if self
.dump_filename
is not None: