Allow DVCS author_transforms values to be specified as pre-formatted strings.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blob9fc7e8246e60b1e5877a85c1d688cf9186cd12dd
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import FatalError
30 from cvs2svn_lib.log import Log
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.symbol import Trunk
33 from cvs2svn_lib.symbol import Branch
34 from cvs2svn_lib.symbol import Tag
35 from cvs2svn_lib.cvs_item import CVSSymbol
36 from cvs2svn_lib.dvcs_common import DVCSOutputOption
37 from cvs2svn_lib.dvcs_common import MirrorUpdater
38 from cvs2svn_lib.key_generator import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
44 pass
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
50 pass
53 class GitRevisionWriter(MirrorUpdater):
55 def start(self, mirror, f):
56 super(GitRevisionWriter, self).start(mirror)
57 self.f = f
59 def _modify_file(self, cvs_item, post_commit):
60 raise NotImplementedError()
62 def add_file(self, cvs_rev, post_commit):
63 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
64 self._modify_file(cvs_rev, post_commit)
66 def modify_file(self, cvs_rev, post_commit):
67 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
68 self._modify_file(cvs_rev, post_commit)
70 def delete_file(self, cvs_rev, post_commit):
71 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
72 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
74 def branch_file(self, cvs_symbol):
75 super(GitRevisionWriter, self).branch_file(cvs_symbol)
76 self._modify_file(cvs_symbol, post_commit=False)
78 def finish(self):
79 super(GitRevisionWriter, self).finish()
80 del self.f
83 class GitRevisionMarkWriter(GitRevisionWriter):
84 def _modify_file(self, cvs_item, post_commit):
85 if cvs_item.cvs_file.executable:
86 mode = '100755'
87 else:
88 mode = '100644'
90 self.f.write(
91 'M %s :%d %s\n'
92 % (mode, cvs_item.revision_reader_token,
93 cvs_item.cvs_file.cvs_path,)
97 class GitRevisionInlineWriter(GitRevisionWriter):
98 def __init__(self, revision_reader):
99 self.revision_reader = revision_reader
101 def register_artifacts(self, which_pass):
102 GitRevisionWriter.register_artifacts(self, which_pass)
103 self.revision_reader.register_artifacts(which_pass)
105 def start(self, mirror, f):
106 GitRevisionWriter.start(self, mirror, f)
107 self.revision_reader.start()
109 def _modify_file(self, cvs_item, post_commit):
110 if cvs_item.cvs_file.executable:
111 mode = '100755'
112 else:
113 mode = '100644'
115 self.f.write(
116 'M %s inline %s\n'
117 % (mode, cvs_item.cvs_file.cvs_path,)
120 if isinstance(cvs_item, CVSSymbol):
121 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
122 else:
123 cvs_rev = cvs_item
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 stream = self.revision_reader.get_content_stream(
128 cvs_rev, suppress_keyword_substitution=False
130 fulltext = stream.read()
131 stream.close()
133 self.f.write('data %d\n' % (len(fulltext),))
134 self.f.write(fulltext)
135 self.f.write('\n')
137 def finish(self):
138 GitRevisionWriter.finish(self)
139 self.revision_reader.finish()
142 class GitOutputOption(DVCSOutputOption):
143 """An OutputOption that outputs to a git-fast-import formatted file.
145 Members:
147 dump_filename -- (string) the name of the file to which the
148 git-fast-import commands for defining revisions will be
149 written.
151 author_transforms -- a map from CVS author names to git full name
152 and email address. See
153 DVCSOutputOption.normalize_author_transforms() for information
154 about the form of this parameter.
158 name = "Git"
160 # The first mark number used for git-fast-import commit marks. This
161 # value needs to be large to avoid conflicts with blob marks.
162 _first_commit_mark = 1000000000
164 def __init__(
165 self, dump_filename, revision_writer,
166 author_transforms=None,
167 tie_tag_fixup_branches=False,
169 """Constructor.
171 DUMP_FILENAME is the name of the file to which the git-fast-import
172 commands for defining revisions should be written. (Please note
173 that depending on the style of revision writer, the actual file
174 contents might not be written to this file.)
176 REVISION_WRITER is a GitRevisionWriter that is used to output
177 either the content of revisions or a mark that was previously used
178 to label a blob.
180 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
181 CVS author names to git full name and email address. All of the
182 contents should either be Unicode strings or 8-bit strings encoded
183 as UTF-8.
185 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag fixup
186 branch, it should be psuedo-merged (ancestry linked but no content changes)
187 back into its source branch, to dispose of the open head.
190 DVCSOutputOption.__init__(self)
191 self.dump_filename = dump_filename
192 self.revision_writer = revision_writer
194 self.author_transforms = self.normalize_author_transforms(author_transforms)
196 self.tie_tag_fixup_branches = tie_tag_fixup_branches
198 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
200 def register_artifacts(self, which_pass):
201 DVCSOutputOption.register_artifacts(self, which_pass)
202 self.revision_writer.register_artifacts(which_pass)
204 def check_symbols(self, symbol_map):
205 # FIXME: What constraints does git impose on symbols?
206 pass
208 def setup(self, svn_rev_count):
209 DVCSOutputOption.setup(self, svn_rev_count)
210 self.f = open(self.dump_filename, 'wb')
212 # The youngest revnum that has been committed so far:
213 self._youngest = 0
215 # A map {lod : [(revnum, mark)]} giving each of the revision
216 # numbers in which there was a commit to lod, and the mark active
217 # at the end of the revnum.
218 self._marks = {}
220 self.revision_writer.start(self._mirror, self.f)
222 def _create_commit_mark(self, lod, revnum):
223 mark = self._mark_generator.gen_id()
224 self._set_lod_mark(lod, revnum, mark)
225 return mark
227 def _set_lod_mark(self, lod, revnum, mark):
228 """Record MARK as the status of LOD for REVNUM.
230 If there is already an entry for REVNUM, overwrite it. If not,
231 append a new entry to the self._marks list for LOD."""
233 assert revnum >= self._youngest
234 entry = (revnum, mark)
235 try:
236 modifications = self._marks[lod]
237 except KeyError:
238 # This LOD hasn't appeared before; create a new list and add the
239 # entry:
240 self._marks[lod] = [entry]
241 else:
242 # A record exists, so it necessarily has at least one element:
243 if modifications[-1][0] == revnum:
244 modifications[-1] = entry
245 else:
246 modifications.append(entry)
247 self._youngest = revnum
249 def _get_author(self, svn_commit):
250 """Return the author to be used for SVN_COMMIT.
252 Return the author as a UTF-8 string in the form needed by git fast-import;
253 that is, 'name <email>'."""
255 cvs_author = svn_commit.get_author()
256 return self._map_author(cvs_author)
258 def _map_author(self, cvs_author):
259 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
261 @staticmethod
262 def _get_log_msg(svn_commit):
263 return svn_commit.get_log_msg()
265 def process_initial_project_commit(self, svn_commit):
266 self._mirror.start_commit(svn_commit.revnum)
267 self._mirror.end_commit()
269 def process_primary_commit(self, svn_commit):
270 author = self._get_author(svn_commit)
271 log_msg = self._get_log_msg(svn_commit)
273 lods = set()
274 for cvs_rev in svn_commit.get_cvs_items():
275 lods.add(cvs_rev.lod)
276 if len(lods) != 1:
277 raise InternalError('Commit affects %d LODs' % (len(lods),))
278 lod = lods.pop()
280 self._mirror.start_commit(svn_commit.revnum)
281 if isinstance(lod, Trunk):
282 # FIXME: is this correct?:
283 self.f.write('commit refs/heads/master\n')
284 else:
285 self.f.write('commit refs/heads/%s\n' % (lod.name,))
286 self.f.write(
287 'mark :%d\n'
288 % (self._create_commit_mark(lod, svn_commit.revnum),)
290 self.f.write(
291 'committer %s %d +0000\n' % (author, svn_commit.date,)
293 self.f.write('data %d\n' % (len(log_msg),))
294 self.f.write('%s\n' % (log_msg,))
295 for cvs_rev in svn_commit.get_cvs_items():
296 self.revision_writer.process_revision(cvs_rev, post_commit=False)
298 self.f.write('\n')
299 self._mirror.end_commit()
301 def process_post_commit(self, svn_commit):
302 author = self._get_author(svn_commit)
303 log_msg = self._get_log_msg(svn_commit)
305 source_lods = set()
306 for cvs_rev in svn_commit.cvs_revs:
307 source_lods.add(cvs_rev.lod)
308 if len(source_lods) != 1:
309 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
310 source_lod = source_lods.pop()
312 self._mirror.start_commit(svn_commit.revnum)
313 # FIXME: is this correct?:
314 self.f.write('commit refs/heads/master\n')
315 self.f.write(
316 'mark :%d\n'
317 % (self._create_commit_mark(None, svn_commit.revnum),)
319 self.f.write(
320 'committer %s %d +0000\n' % (author, svn_commit.date,)
322 self.f.write('data %d\n' % (len(log_msg),))
323 self.f.write('%s\n' % (log_msg,))
324 self.f.write(
325 'merge :%d\n'
326 % (self._get_source_mark(source_lod, svn_commit.revnum),)
328 for cvs_rev in svn_commit.cvs_revs:
329 self.revision_writer.process_revision(cvs_rev, post_commit=True)
331 self.f.write('\n')
332 self._mirror.end_commit()
334 def _get_source_mark(self, source_lod, revnum):
335 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
337 modifications = self._marks[source_lod]
338 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
339 (revnum, mark) = modifications[i]
340 return mark
342 def describe_lod_to_user(self, lod):
343 """This needs to make sense to users of the fastimported result."""
344 if isinstance(lod, Trunk):
345 return 'master'
346 else:
347 return lod.name
349 def _describe_commit(self, svn_commit, lod):
350 author = self._map_author(svn_commit.get_author())
351 if author.endswith(" <>"):
352 author = author[:-3]
353 date = time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date))
354 log_msg = svn_commit.get_log_msg()
355 if log_msg.find('\n') != -1:
356 log_msg = log_msg[:log_msg.index('\n')]
357 return "%s %s %s '%s'" % (
358 self.describe_lod_to_user(lod), date, author, log_msg,)
360 def _process_symbol_commit(
361 self, svn_commit, git_branch, source_groups, mark
363 author = self._get_author(svn_commit)
364 log_msg = self._get_log_msg(svn_commit)
366 # Get the primary parent
367 p_source_lod, p_source_revnum, p_cvs_symbols = source_groups[0]
368 try:
369 p_source_node = self._mirror.get_old_lod_directory(p_source_lod, p_source_revnum)
370 except KeyError:
371 raise InternalError('Source %r does not exist' % (p_source_lod,))
372 cvs_files_to_delete = set(self._get_all_files(p_source_node))
374 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
375 for cvs_symbol in cvs_symbols:
376 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
378 # Write a trailer to the log message which describes the cherrypicks that
379 # make up this symbol creation.
380 log_msg += "\n"
381 log_msg += "\nSprout from %s" % (
382 self._describe_commit(Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
383 p_source_lod),)
384 for (source_lod, source_revnum, cvs_symbols,) in source_groups[1:]:
385 log_msg += "\nCherrypick from %s:" % (
386 self._describe_commit(Ctx()._persistence_manager.get_svn_commit(source_revnum),
387 source_lod),)
388 for cvs_symbol in cvs_symbols:
389 log_msg += "\n %s" % (cvs_symbol.cvs_file.cvs_path,)
390 if len(cvs_files_to_delete):
391 log_msg += "\nDelete:"
392 for cvs_file in sorted(cvs_files_to_delete):
393 log_msg += "\n %s" % (cvs_file.cvs_path,)
395 self.f.write('commit %s\n' % (git_branch,))
396 self.f.write('mark :%d\n' % (mark,))
397 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
398 self.f.write('data %d\n' % (len(log_msg),))
399 self.f.write('%s\n' % (log_msg,))
401 # Only record actual DVCS ancestry for the primary sprout parent,
402 # all the rest are effectively cherrypicks.
403 self.f.write(
404 'merge :%d\n'
405 % (self._get_source_mark(p_source_lod, p_source_revnum),)
408 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
409 for cvs_symbol in cvs_symbols:
410 self.revision_writer.branch_file(cvs_symbol)
412 for cvs_file in cvs_files_to_delete:
413 self.f.write('D %s\n' % (cvs_file.cvs_path,))
415 self.f.write('\n')
417 def process_branch_commit(self, svn_commit):
418 self._mirror.start_commit(svn_commit.revnum)
420 source_groups = list(self._get_source_groups(svn_commit))
421 if self._is_simple_copy(svn_commit, source_groups):
422 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
423 Log().debug(
424 '%s will be created via a simple copy from %s:r%d'
425 % (svn_commit.symbol, source_lod, source_revnum,)
427 mark = self._get_source_mark(source_lod, source_revnum)
428 self._set_symbol(svn_commit.symbol, mark)
429 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
430 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
431 else:
432 Log().debug(
433 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
435 self._process_symbol_commit(
436 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
437 source_groups,
438 self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
441 self._mirror.end_commit()
443 def _set_symbol(self, symbol, mark):
444 if isinstance(symbol, Branch):
445 category = 'heads'
446 elif isinstance(symbol, Tag):
447 category = 'tags'
448 else:
449 raise InternalError()
450 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
451 self.f.write('from :%d\n' % (mark,))
453 def get_tag_fixup_branch_name(self, svn_commit):
454 # The branch name to use for the "tag fixup branches". The git-fast-import
455 # documentation suggests using 'TAG_FIXUP' (outside of the refs/heads
456 # namespace), but this is currently broken.
457 # Use a name containing '.', which is not allowed in CVS symbols, to avoid
458 # conflicts (though of course a conflict could still result if the user
459 # requests symbol transformations).
460 return 'refs/heads/TAG.FIXUP'
462 def process_tag_commit(self, svn_commit):
463 # FIXME: For now we create a fixup branch with the same name as
464 # the tag, then the tag. We never delete the fixup branch.
465 self._mirror.start_commit(svn_commit.revnum)
467 source_groups = list(self._get_source_groups(svn_commit))
468 if self._is_simple_copy(svn_commit, source_groups):
469 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
470 Log().debug(
471 '%s will be created via a simple copy from %s:r%d'
472 % (svn_commit.symbol, source_lod, source_revnum,)
474 mark = self._get_source_mark(source_lod, source_revnum)
475 self._set_symbol(svn_commit.symbol, mark)
476 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
477 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
478 else:
479 Log().debug(
480 '%s will be created via a fixup branch' % (svn_commit.symbol,)
483 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
485 # Create the fixup branch (which might involve making more than
486 # one commit):
487 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
488 self._process_symbol_commit(
489 svn_commit, fixup_branch_name, source_groups, mark
492 # Store the mark of the last commit to the fixup branch as the
493 # value of the tag:
494 self._set_symbol(svn_commit.symbol, mark)
495 self.f.write('reset %s\n' % (fixup_branch_name,))
496 self.f.write('\n')
498 if self.tie_tag_fixup_branches:
499 source_lod = source_groups[0][0]
500 source_lod_git_branch = 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
502 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
503 author = self._map_author(Ctx().username)
504 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
506 self.f.write('commit %s\n' % (source_lod_git_branch,))
507 self.f.write('mark :%d\n' % (mark2,))
508 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
509 self.f.write('data %d\n' % (len(log_msg),))
510 self.f.write('%s\n' % (log_msg,))
512 self.f.write(
513 'merge :%d\n'
514 % (mark,)
517 self.f.write('\n')
519 self._mirror.end_commit()
521 def _get_log_msg_for_ancestry_tie(self, svn_commit):
522 return Ctx().text_wrapper.fill(
523 Ctx().tie_tag_ancestry_message % {
524 'symbol_name' : svn_commit.symbol.name,
528 def cleanup(self):
529 DVCSOutputOption.cleanup(self)
530 self.revision_writer.finish()
531 self.f.close()
532 del self.f