cvs2git: Emit some more information in OutputPass.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blobff056c3529fa8413a2948e5afeb69d3f34b1997e
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.log import logger
30 from cvs2svn_lib.context import Ctx
31 from cvs2svn_lib.symbol import Trunk
32 from cvs2svn_lib.symbol import Branch
33 from cvs2svn_lib.symbol import Tag
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.dvcs_common import DVCSOutputOption
36 from cvs2svn_lib.dvcs_common import MirrorUpdater
37 from cvs2svn_lib.key_generator import KeyGenerator
40 class GitRevisionWriter(MirrorUpdater):
42 def start(self, mirror, f):
43 super(GitRevisionWriter, self).start(mirror)
44 self.f = f
46 def _modify_file(self, cvs_item, post_commit):
47 raise NotImplementedError()
49 def add_file(self, cvs_rev, post_commit):
50 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
51 self._modify_file(cvs_rev, post_commit)
53 def modify_file(self, cvs_rev, post_commit):
54 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
55 self._modify_file(cvs_rev, post_commit)
57 def delete_file(self, cvs_rev, post_commit):
58 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
59 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
61 def branch_file(self, cvs_symbol):
62 super(GitRevisionWriter, self).branch_file(cvs_symbol)
63 self._modify_file(cvs_symbol, post_commit=False)
65 def finish(self):
66 super(GitRevisionWriter, self).finish()
67 del self.f
70 class GitRevisionMarkWriter(GitRevisionWriter):
71 def _modify_file(self, cvs_item, post_commit):
72 if cvs_item.cvs_file.executable:
73 mode = '100755'
74 else:
75 mode = '100644'
77 self.f.write(
78 'M %s :%d %s\n'
79 % (mode, cvs_item.revision_reader_token,
80 cvs_item.cvs_file.cvs_path,)
84 class GitRevisionInlineWriter(GitRevisionWriter):
85 def __init__(self, revision_reader):
86 self.revision_reader = revision_reader
88 def register_artifacts(self, which_pass):
89 GitRevisionWriter.register_artifacts(self, which_pass)
90 self.revision_reader.register_artifacts(which_pass)
92 def start(self, mirror, f):
93 GitRevisionWriter.start(self, mirror, f)
94 self.revision_reader.start()
96 def _modify_file(self, cvs_item, post_commit):
97 if cvs_item.cvs_file.executable:
98 mode = '100755'
99 else:
100 mode = '100644'
102 self.f.write(
103 'M %s inline %s\n'
104 % (mode, cvs_item.cvs_file.cvs_path,)
107 if isinstance(cvs_item, CVSSymbol):
108 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
109 else:
110 cvs_rev = cvs_item
112 # FIXME: We have to decide what to do about keyword substitution
113 # and eol_style here:
114 fulltext = self.revision_reader.get_content(cvs_rev)
116 self.f.write('data %d\n' % (len(fulltext),))
117 self.f.write(fulltext)
118 self.f.write('\n')
120 def finish(self):
121 GitRevisionWriter.finish(self)
122 self.revision_reader.finish()
125 class GitOutputOption(DVCSOutputOption):
126 """An OutputOption that outputs to a git-fast-import formatted file.
128 Members:
130 dump_filename -- (string) the name of the file to which the
131 git-fast-import commands for defining revisions will be
132 written.
134 author_transforms -- a map from CVS author names to git full name
135 and email address. See
136 DVCSOutputOption.normalize_author_transforms() for information
137 about the form of this parameter.
141 name = "Git"
143 # The first mark number used for git-fast-import commit marks. This
144 # value needs to be large to avoid conflicts with blob marks.
145 _first_commit_mark = 1000000000
147 def __init__(
148 self, dump_filename, revision_writer,
149 author_transforms=None,
150 tie_tag_fixup_branches=False,
152 """Constructor.
154 DUMP_FILENAME is the name of the file to which the git-fast-import
155 commands for defining revisions should be written. (Please note
156 that depending on the style of revision writer, the actual file
157 contents might not be written to this file.)
159 REVISION_WRITER is a GitRevisionWriter that is used to output
160 either the content of revisions or a mark that was previously used
161 to label a blob.
163 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
164 CVS author names to git full name and email address. All of the
165 contents should either be Unicode strings or 8-bit strings encoded
166 as UTF-8.
168 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
169 fixup branch, it should be psuedo-merged (ancestry linked but no
170 content changes) back into its source branch, to dispose of the
171 open head.
174 DVCSOutputOption.__init__(self)
175 self.dump_filename = dump_filename
176 self.revision_writer = revision_writer
178 self.author_transforms = self.normalize_author_transforms(
179 author_transforms
182 self.tie_tag_fixup_branches = tie_tag_fixup_branches
184 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
186 def register_artifacts(self, which_pass):
187 DVCSOutputOption.register_artifacts(self, which_pass)
188 self.revision_writer.register_artifacts(which_pass)
190 def check_symbols(self, symbol_map):
191 # FIXME: What constraints does git impose on symbols?
192 pass
194 def setup(self, svn_rev_count):
195 DVCSOutputOption.setup(self, svn_rev_count)
196 self.f = open(self.dump_filename, 'wb')
198 # The youngest revnum that has been committed so far:
199 self._youngest = 0
201 # A map {lod : [(revnum, mark)]} giving each of the revision
202 # numbers in which there was a commit to lod, and the mark active
203 # at the end of the revnum.
204 self._marks = {}
206 self.revision_writer.start(self._mirror, self.f)
208 def _create_commit_mark(self, lod, revnum):
209 mark = self._mark_generator.gen_id()
210 self._set_lod_mark(lod, revnum, mark)
211 return mark
213 def _set_lod_mark(self, lod, revnum, mark):
214 """Record MARK as the status of LOD for REVNUM.
216 If there is already an entry for REVNUM, overwrite it. If not,
217 append a new entry to the self._marks list for LOD."""
219 assert revnum >= self._youngest
220 entry = (revnum, mark)
221 try:
222 modifications = self._marks[lod]
223 except KeyError:
224 # This LOD hasn't appeared before; create a new list and add the
225 # entry:
226 self._marks[lod] = [entry]
227 else:
228 # A record exists, so it necessarily has at least one element:
229 if modifications[-1][0] == revnum:
230 modifications[-1] = entry
231 else:
232 modifications.append(entry)
233 self._youngest = revnum
235 def _get_author(self, svn_commit):
236 """Return the author to be used for SVN_COMMIT.
238 Return the author as a UTF-8 string in the form needed by git
239 fast-import; that is, 'name <email>'."""
241 cvs_author = svn_commit.get_author()
242 return self._map_author(cvs_author)
244 def _map_author(self, cvs_author):
245 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
247 @staticmethod
248 def _get_log_msg(svn_commit):
249 return svn_commit.get_log_msg()
251 def process_initial_project_commit(self, svn_commit):
252 self._mirror.start_commit(svn_commit.revnum)
253 self._mirror.end_commit()
255 def process_primary_commit(self, svn_commit):
256 author = self._get_author(svn_commit)
257 log_msg = self._get_log_msg(svn_commit)
259 lods = set()
260 for cvs_rev in svn_commit.get_cvs_items():
261 lods.add(cvs_rev.lod)
262 if len(lods) != 1:
263 raise InternalError('Commit affects %d LODs' % (len(lods),))
264 lod = lods.pop()
266 self._mirror.start_commit(svn_commit.revnum)
267 if isinstance(lod, Trunk):
268 # FIXME: is this correct?:
269 self.f.write('commit refs/heads/master\n')
270 else:
271 self.f.write('commit refs/heads/%s\n' % (lod.name,))
272 mark = self._create_commit_mark(lod, svn_commit.revnum)
273 logger.normal(
274 'Writing commit r%d on %s (mark :%d)'
275 % (svn_commit.revnum, lod, mark,)
277 self.f.write('mark :%d\n' % (mark,))
278 self.f.write(
279 'committer %s %d +0000\n' % (author, svn_commit.date,)
281 self.f.write('data %d\n' % (len(log_msg),))
282 self.f.write('%s\n' % (log_msg,))
283 for cvs_rev in svn_commit.get_cvs_items():
284 self.revision_writer.process_revision(cvs_rev, post_commit=False)
286 self.f.write('\n')
287 self._mirror.end_commit()
289 def process_post_commit(self, svn_commit):
290 author = self._get_author(svn_commit)
291 log_msg = self._get_log_msg(svn_commit)
293 source_lods = set()
294 for cvs_rev in svn_commit.cvs_revs:
295 source_lods.add(cvs_rev.lod)
296 if len(source_lods) != 1:
297 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
298 source_lod = source_lods.pop()
300 self._mirror.start_commit(svn_commit.revnum)
301 # FIXME: is this correct?:
302 self.f.write('commit refs/heads/master\n')
303 mark = self._create_commit_mark(None, svn_commit.revnum)
304 logger.normal(
305 'Writing post-commit r%d on %s (mark :%d)'
306 % (svn_commit.revnum, lod, mark,)
308 self.f.write('mark :%d\n' % (mark,))
309 self.f.write(
310 'committer %s %d +0000\n' % (author, svn_commit.date,)
312 self.f.write('data %d\n' % (len(log_msg),))
313 self.f.write('%s\n' % (log_msg,))
314 self.f.write(
315 'merge :%d\n'
316 % (self._get_source_mark(source_lod, svn_commit.revnum),)
318 for cvs_rev in svn_commit.cvs_revs:
319 self.revision_writer.process_revision(cvs_rev, post_commit=True)
321 self.f.write('\n')
322 self._mirror.end_commit()
324 def _get_source_mark(self, source_lod, revnum):
325 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
327 modifications = self._marks[source_lod]
328 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
329 (revnum, mark) = modifications[i]
330 return mark
332 def describe_lod_to_user(self, lod):
333 """This needs to make sense to users of the fastimported result."""
334 if isinstance(lod, Trunk):
335 return 'master'
336 else:
337 return lod.name
339 def _describe_commit(self, svn_commit, lod):
340 author = self._map_author(svn_commit.get_author())
341 if author.endswith(" <>"):
342 author = author[:-3]
343 date = time.strftime(
344 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
346 log_msg = svn_commit.get_log_msg()
347 if log_msg.find('\n') != -1:
348 log_msg = log_msg[:log_msg.index('\n')]
349 return "%s %s %s '%s'" % (
350 self.describe_lod_to_user(lod), date, author, log_msg,)
352 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
353 author = self._get_author(svn_commit)
354 log_msg = self._get_log_msg(svn_commit)
356 # There are two distinct cases we need to care for here:
357 # 1. initial creation of a LOD
358 # 2. fixup of an existing LOD to include more files, because the LOD in
359 # CVS was created piecemeal over time, with intervening commits
361 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
362 # might be technically more correct (though _get_lod_history is currently
363 # underscore-private)
364 is_initial_lod_creation = svn_commit.symbol not in self._marks
366 # Create the mark, only after the check above
367 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
369 if is_initial_lod_creation:
370 # Get the primary parent
371 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
372 try:
373 p_source_node = self._mirror.get_old_lod_directory(
374 p_source_lod, p_source_revnum
376 except KeyError:
377 raise InternalError('Source %r does not exist' % (p_source_lod,))
378 cvs_files_to_delete = set(self._get_all_files(p_source_node))
380 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
381 for cvs_symbol in cvs_symbols:
382 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
384 # Write a trailer to the log message which describes the cherrypicks that
385 # make up this symbol creation.
386 log_msg += "\n"
387 if is_initial_lod_creation:
388 log_msg += "\nSprout from %s" % (
389 self._describe_commit(
390 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
391 p_source_lod
394 for (source_revnum, source_lod, cvs_symbols,) \
395 in source_groups[(is_initial_lod_creation and 1 or 0):]:
396 log_msg += "\nCherrypick from %s:" % (
397 self._describe_commit(
398 Ctx()._persistence_manager.get_svn_commit(source_revnum),
399 source_lod
402 for cvs_path in sorted(
403 cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols
405 log_msg += "\n %s" % (cvs_path,)
406 if is_initial_lod_creation:
407 if cvs_files_to_delete:
408 log_msg += "\nDelete:"
409 for cvs_path in sorted(
410 cvs_file.cvs_path for cvs_file in cvs_files_to_delete
412 log_msg += "\n %s" % (cvs_path,)
414 self.f.write('commit %s\n' % (git_branch,))
415 self.f.write('mark :%d\n' % (mark,))
416 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
417 self.f.write('data %d\n' % (len(log_msg),))
418 self.f.write('%s\n' % (log_msg,))
420 # Only record actual DVCS ancestry for the primary sprout parent,
421 # all the rest are effectively cherrypicks.
422 if is_initial_lod_creation:
423 self.f.write(
424 'from :%d\n'
425 % (self._get_source_mark(p_source_lod, p_source_revnum),)
428 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
429 for cvs_symbol in cvs_symbols:
430 self.revision_writer.branch_file(cvs_symbol)
432 if is_initial_lod_creation:
433 for cvs_file in cvs_files_to_delete:
434 self.f.write('D %s\n' % (cvs_file.cvs_path,))
436 self.f.write('\n')
437 return mark
439 def process_branch_commit(self, svn_commit):
440 self._mirror.start_commit(svn_commit.revnum)
442 source_groups = self._get_source_groups(svn_commit)
443 if self._is_simple_copy(svn_commit, source_groups):
444 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
445 logger.debug(
446 '%s will be created via a simple copy from %s:r%d'
447 % (svn_commit.symbol, source_lod, source_revnum,)
449 mark = self._get_source_mark(source_lod, source_revnum)
450 self._set_symbol(svn_commit.symbol, mark)
451 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
452 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
453 else:
454 logger.debug(
455 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
457 self._process_symbol_commit(
458 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
459 source_groups,
462 self._mirror.end_commit()
464 def _set_symbol(self, symbol, mark):
465 if isinstance(symbol, Branch):
466 category = 'heads'
467 elif isinstance(symbol, Tag):
468 category = 'tags'
469 else:
470 raise InternalError()
471 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
472 self.f.write('from :%d\n' % (mark,))
474 def get_tag_fixup_branch_name(self, svn_commit):
475 # The branch name to use for the "tag fixup branches". The
476 # git-fast-import documentation suggests using 'TAG_FIXUP'
477 # (outside of the refs/heads namespace), but this is currently
478 # broken. Use a name containing '.', which is not allowed in CVS
479 # symbols, to avoid conflicts (though of course a conflict could
480 # still result if the user requests symbol transformations).
481 return 'refs/heads/TAG.FIXUP'
483 def process_tag_commit(self, svn_commit):
484 # FIXME: For now we create a fixup branch with the same name as
485 # the tag, then the tag. We never delete the fixup branch.
486 self._mirror.start_commit(svn_commit.revnum)
488 source_groups = self._get_source_groups(svn_commit)
489 if self._is_simple_copy(svn_commit, source_groups):
490 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
491 logger.debug(
492 '%s will be created via a simple copy from %s:r%d'
493 % (svn_commit.symbol, source_lod, source_revnum,)
495 mark = self._get_source_mark(source_lod, source_revnum)
496 self._set_symbol(svn_commit.symbol, mark)
497 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
498 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
499 else:
500 logger.debug(
501 '%s will be created via a fixup branch' % (svn_commit.symbol,)
504 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
506 # Create the fixup branch (which might involve making more than
507 # one commit):
508 mark = self._process_symbol_commit(
509 svn_commit, fixup_branch_name, source_groups
512 # Store the mark of the last commit to the fixup branch as the
513 # value of the tag:
514 self._set_symbol(svn_commit.symbol, mark)
515 self.f.write('reset %s\n' % (fixup_branch_name,))
516 self.f.write('\n')
518 if self.tie_tag_fixup_branches:
519 source_lod = source_groups[0][1]
520 source_lod_git_branch = \
521 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
523 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
524 author = self._map_author(Ctx().username)
525 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
527 self.f.write('commit %s\n' % (source_lod_git_branch,))
528 self.f.write('mark :%d\n' % (mark2,))
529 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
530 self.f.write('data %d\n' % (len(log_msg),))
531 self.f.write('%s\n' % (log_msg,))
533 self.f.write(
534 'merge :%d\n'
535 % (mark,)
538 self.f.write('\n')
540 self._mirror.end_commit()
542 def _get_log_msg_for_ancestry_tie(self, svn_commit):
543 return Ctx().text_wrapper.fill(
544 Ctx().tie_tag_ancestry_message % {
545 'symbol_name' : svn_commit.symbol.name,
549 def cleanup(self):
550 DVCSOutputOption.cleanup(self)
551 self.revision_writer.finish()
552 self.f.close()
553 del self.f