Extract a method, DumpfileDelegate._string_for_props().
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blob42a2599317291b4a04646dba4ac729e7d18f5e1f
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import FatalError
30 from cvs2svn_lib.log import Log
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.symbol import Trunk
33 from cvs2svn_lib.symbol import Branch
34 from cvs2svn_lib.symbol import Tag
35 from cvs2svn_lib.cvs_item import CVSSymbol
36 from cvs2svn_lib.dvcs_common import DVCSOutputOption
37 from cvs2svn_lib.dvcs_common import MirrorUpdater
38 from cvs2svn_lib.key_generator import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
44 pass
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
50 pass
53 class GitRevisionWriter(MirrorUpdater):
55 def start(self, mirror, f):
56 super(GitRevisionWriter, self).start(mirror)
57 self.f = f
59 def _modify_file(self, cvs_item, post_commit):
60 raise NotImplementedError()
62 def add_file(self, cvs_rev, post_commit):
63 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
64 self._modify_file(cvs_rev, post_commit)
66 def modify_file(self, cvs_rev, post_commit):
67 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
68 self._modify_file(cvs_rev, post_commit)
70 def delete_file(self, cvs_rev, post_commit):
71 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
72 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
74 def branch_file(self, cvs_symbol):
75 super(GitRevisionWriter, self).branch_file(cvs_symbol)
76 self._modify_file(cvs_symbol, post_commit=False)
78 def finish(self):
79 super(GitRevisionWriter, self).finish()
80 del self.f
83 class GitRevisionMarkWriter(GitRevisionWriter):
84 def _modify_file(self, cvs_item, post_commit):
85 if cvs_item.cvs_file.executable:
86 mode = '100755'
87 else:
88 mode = '100644'
90 self.f.write(
91 'M %s :%d %s\n'
92 % (mode, cvs_item.revision_reader_token,
93 cvs_item.cvs_file.cvs_path,)
97 class GitRevisionInlineWriter(GitRevisionWriter):
98 def __init__(self, revision_reader):
99 self.revision_reader = revision_reader
101 def register_artifacts(self, which_pass):
102 GitRevisionWriter.register_artifacts(self, which_pass)
103 self.revision_reader.register_artifacts(which_pass)
105 def start(self, mirror, f):
106 GitRevisionWriter.start(self, mirror, f)
107 self.revision_reader.start()
109 def _modify_file(self, cvs_item, post_commit):
110 if cvs_item.cvs_file.executable:
111 mode = '100755'
112 else:
113 mode = '100644'
115 self.f.write(
116 'M %s inline %s\n'
117 % (mode, cvs_item.cvs_file.cvs_path,)
120 if isinstance(cvs_item, CVSSymbol):
121 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
122 else:
123 cvs_rev = cvs_item
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 fulltext = self.revision_reader.get_content(
128 cvs_rev, suppress_keyword_substitution=False
131 self.f.write('data %d\n' % (len(fulltext),))
132 self.f.write(fulltext)
133 self.f.write('\n')
135 def finish(self):
136 GitRevisionWriter.finish(self)
137 self.revision_reader.finish()
140 class GitOutputOption(DVCSOutputOption):
141 """An OutputOption that outputs to a git-fast-import formatted file.
143 Members:
145 dump_filename -- (string) the name of the file to which the
146 git-fast-import commands for defining revisions will be
147 written.
149 author_transforms -- a map from CVS author names to git full name
150 and email address. See
151 DVCSOutputOption.normalize_author_transforms() for information
152 about the form of this parameter.
156 name = "Git"
158 # The first mark number used for git-fast-import commit marks. This
159 # value needs to be large to avoid conflicts with blob marks.
160 _first_commit_mark = 1000000000
162 def __init__(
163 self, dump_filename, revision_writer,
164 author_transforms=None,
165 tie_tag_fixup_branches=False,
167 """Constructor.
169 DUMP_FILENAME is the name of the file to which the git-fast-import
170 commands for defining revisions should be written. (Please note
171 that depending on the style of revision writer, the actual file
172 contents might not be written to this file.)
174 REVISION_WRITER is a GitRevisionWriter that is used to output
175 either the content of revisions or a mark that was previously used
176 to label a blob.
178 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
179 CVS author names to git full name and email address. All of the
180 contents should either be Unicode strings or 8-bit strings encoded
181 as UTF-8.
183 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
184 fixup branch, it should be psuedo-merged (ancestry linked but no
185 content changes) back into its source branch, to dispose of the
186 open head.
189 DVCSOutputOption.__init__(self)
190 self.dump_filename = dump_filename
191 self.revision_writer = revision_writer
193 self.author_transforms = self.normalize_author_transforms(
194 author_transforms
197 self.tie_tag_fixup_branches = tie_tag_fixup_branches
199 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
201 def register_artifacts(self, which_pass):
202 DVCSOutputOption.register_artifacts(self, which_pass)
203 self.revision_writer.register_artifacts(which_pass)
205 def check_symbols(self, symbol_map):
206 # FIXME: What constraints does git impose on symbols?
207 pass
209 def setup(self, svn_rev_count):
210 DVCSOutputOption.setup(self, svn_rev_count)
211 self.f = open(self.dump_filename, 'wb')
213 # The youngest revnum that has been committed so far:
214 self._youngest = 0
216 # A map {lod : [(revnum, mark)]} giving each of the revision
217 # numbers in which there was a commit to lod, and the mark active
218 # at the end of the revnum.
219 self._marks = {}
221 self.revision_writer.start(self._mirror, self.f)
223 def _create_commit_mark(self, lod, revnum):
224 mark = self._mark_generator.gen_id()
225 self._set_lod_mark(lod, revnum, mark)
226 return mark
228 def _set_lod_mark(self, lod, revnum, mark):
229 """Record MARK as the status of LOD for REVNUM.
231 If there is already an entry for REVNUM, overwrite it. If not,
232 append a new entry to the self._marks list for LOD."""
234 assert revnum >= self._youngest
235 entry = (revnum, mark)
236 try:
237 modifications = self._marks[lod]
238 except KeyError:
239 # This LOD hasn't appeared before; create a new list and add the
240 # entry:
241 self._marks[lod] = [entry]
242 else:
243 # A record exists, so it necessarily has at least one element:
244 if modifications[-1][0] == revnum:
245 modifications[-1] = entry
246 else:
247 modifications.append(entry)
248 self._youngest = revnum
250 def _get_author(self, svn_commit):
251 """Return the author to be used for SVN_COMMIT.
253 Return the author as a UTF-8 string in the form needed by git
254 fast-import; that is, 'name <email>'."""
256 cvs_author = svn_commit.get_author()
257 return self._map_author(cvs_author)
259 def _map_author(self, cvs_author):
260 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
262 @staticmethod
263 def _get_log_msg(svn_commit):
264 return svn_commit.get_log_msg()
266 def process_initial_project_commit(self, svn_commit):
267 self._mirror.start_commit(svn_commit.revnum)
268 self._mirror.end_commit()
270 def process_primary_commit(self, svn_commit):
271 author = self._get_author(svn_commit)
272 log_msg = self._get_log_msg(svn_commit)
274 lods = set()
275 for cvs_rev in svn_commit.get_cvs_items():
276 lods.add(cvs_rev.lod)
277 if len(lods) != 1:
278 raise InternalError('Commit affects %d LODs' % (len(lods),))
279 lod = lods.pop()
281 self._mirror.start_commit(svn_commit.revnum)
282 if isinstance(lod, Trunk):
283 # FIXME: is this correct?:
284 self.f.write('commit refs/heads/master\n')
285 else:
286 self.f.write('commit refs/heads/%s\n' % (lod.name,))
287 self.f.write(
288 'mark :%d\n'
289 % (self._create_commit_mark(lod, svn_commit.revnum),)
291 self.f.write(
292 'committer %s %d +0000\n' % (author, svn_commit.date,)
294 self.f.write('data %d\n' % (len(log_msg),))
295 self.f.write('%s\n' % (log_msg,))
296 for cvs_rev in svn_commit.get_cvs_items():
297 self.revision_writer.process_revision(cvs_rev, post_commit=False)
299 self.f.write('\n')
300 self._mirror.end_commit()
302 def process_post_commit(self, svn_commit):
303 author = self._get_author(svn_commit)
304 log_msg = self._get_log_msg(svn_commit)
306 source_lods = set()
307 for cvs_rev in svn_commit.cvs_revs:
308 source_lods.add(cvs_rev.lod)
309 if len(source_lods) != 1:
310 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
311 source_lod = source_lods.pop()
313 self._mirror.start_commit(svn_commit.revnum)
314 # FIXME: is this correct?:
315 self.f.write('commit refs/heads/master\n')
316 self.f.write(
317 'mark :%d\n'
318 % (self._create_commit_mark(None, svn_commit.revnum),)
320 self.f.write(
321 'committer %s %d +0000\n' % (author, svn_commit.date,)
323 self.f.write('data %d\n' % (len(log_msg),))
324 self.f.write('%s\n' % (log_msg,))
325 self.f.write(
326 'merge :%d\n'
327 % (self._get_source_mark(source_lod, svn_commit.revnum),)
329 for cvs_rev in svn_commit.cvs_revs:
330 self.revision_writer.process_revision(cvs_rev, post_commit=True)
332 self.f.write('\n')
333 self._mirror.end_commit()
335 def _get_source_mark(self, source_lod, revnum):
336 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
338 modifications = self._marks[source_lod]
339 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
340 (revnum, mark) = modifications[i]
341 return mark
343 def describe_lod_to_user(self, lod):
344 """This needs to make sense to users of the fastimported result."""
345 if isinstance(lod, Trunk):
346 return 'master'
347 else:
348 return lod.name
350 def _describe_commit(self, svn_commit, lod):
351 author = self._map_author(svn_commit.get_author())
352 if author.endswith(" <>"):
353 author = author[:-3]
354 date = time.strftime(
355 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
357 log_msg = svn_commit.get_log_msg()
358 if log_msg.find('\n') != -1:
359 log_msg = log_msg[:log_msg.index('\n')]
360 return "%s %s %s '%s'" % (
361 self.describe_lod_to_user(lod), date, author, log_msg,)
363 def _process_symbol_commit(
364 self, svn_commit, git_branch, source_groups, mark
366 author = self._get_author(svn_commit)
367 log_msg = self._get_log_msg(svn_commit)
369 # Get the primary parent
370 p_source_lod, p_source_revnum, p_cvs_symbols = source_groups[0]
371 try:
372 p_source_node = self._mirror.get_old_lod_directory(
373 p_source_lod, p_source_revnum
375 except KeyError:
376 raise InternalError('Source %r does not exist' % (p_source_lod,))
377 cvs_files_to_delete = set(self._get_all_files(p_source_node))
379 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
380 for cvs_symbol in cvs_symbols:
381 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
383 # Write a trailer to the log message which describes the cherrypicks that
384 # make up this symbol creation.
385 log_msg += "\n"
386 log_msg += "\nSprout from %s" % (
387 self._describe_commit(
388 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
389 p_source_lod
392 for (source_lod, source_revnum, cvs_symbols,) in source_groups[1:]:
393 log_msg += "\nCherrypick from %s:" % (
394 self._describe_commit(
395 Ctx()._persistence_manager.get_svn_commit(source_revnum),
396 source_lod
399 for cvs_symbol in cvs_symbols:
400 log_msg += "\n %s" % (cvs_symbol.cvs_file.cvs_path,)
401 if len(cvs_files_to_delete):
402 log_msg += "\nDelete:"
403 for cvs_file in sorted(cvs_files_to_delete):
404 log_msg += "\n %s" % (cvs_file.cvs_path,)
406 self.f.write('commit %s\n' % (git_branch,))
407 self.f.write('mark :%d\n' % (mark,))
408 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
409 self.f.write('data %d\n' % (len(log_msg),))
410 self.f.write('%s\n' % (log_msg,))
412 # Only record actual DVCS ancestry for the primary sprout parent,
413 # all the rest are effectively cherrypicks.
414 self.f.write(
415 'merge :%d\n'
416 % (self._get_source_mark(p_source_lod, p_source_revnum),)
419 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
420 for cvs_symbol in cvs_symbols:
421 self.revision_writer.branch_file(cvs_symbol)
423 for cvs_file in cvs_files_to_delete:
424 self.f.write('D %s\n' % (cvs_file.cvs_path,))
426 self.f.write('\n')
428 def process_branch_commit(self, svn_commit):
429 self._mirror.start_commit(svn_commit.revnum)
431 source_groups = list(self._get_source_groups(svn_commit))
432 if self._is_simple_copy(svn_commit, source_groups):
433 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
434 Log().debug(
435 '%s will be created via a simple copy from %s:r%d'
436 % (svn_commit.symbol, source_lod, source_revnum,)
438 mark = self._get_source_mark(source_lod, source_revnum)
439 self._set_symbol(svn_commit.symbol, mark)
440 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
441 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
442 else:
443 Log().debug(
444 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
446 self._process_symbol_commit(
447 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
448 source_groups,
449 self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
452 self._mirror.end_commit()
454 def _set_symbol(self, symbol, mark):
455 if isinstance(symbol, Branch):
456 category = 'heads'
457 elif isinstance(symbol, Tag):
458 category = 'tags'
459 else:
460 raise InternalError()
461 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
462 self.f.write('from :%d\n' % (mark,))
464 def get_tag_fixup_branch_name(self, svn_commit):
465 # The branch name to use for the "tag fixup branches". The
466 # git-fast-import documentation suggests using 'TAG_FIXUP'
467 # (outside of the refs/heads namespace), but this is currently
468 # broken. Use a name containing '.', which is not allowed in CVS
469 # symbols, to avoid conflicts (though of course a conflict could
470 # still result if the user requests symbol transformations).
471 return 'refs/heads/TAG.FIXUP'
473 def process_tag_commit(self, svn_commit):
474 # FIXME: For now we create a fixup branch with the same name as
475 # the tag, then the tag. We never delete the fixup branch.
476 self._mirror.start_commit(svn_commit.revnum)
478 source_groups = list(self._get_source_groups(svn_commit))
479 if self._is_simple_copy(svn_commit, source_groups):
480 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
481 Log().debug(
482 '%s will be created via a simple copy from %s:r%d'
483 % (svn_commit.symbol, source_lod, source_revnum,)
485 mark = self._get_source_mark(source_lod, source_revnum)
486 self._set_symbol(svn_commit.symbol, mark)
487 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
488 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
489 else:
490 Log().debug(
491 '%s will be created via a fixup branch' % (svn_commit.symbol,)
494 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
496 # Create the fixup branch (which might involve making more than
497 # one commit):
498 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
499 self._process_symbol_commit(
500 svn_commit, fixup_branch_name, source_groups, mark
503 # Store the mark of the last commit to the fixup branch as the
504 # value of the tag:
505 self._set_symbol(svn_commit.symbol, mark)
506 self.f.write('reset %s\n' % (fixup_branch_name,))
507 self.f.write('\n')
509 if self.tie_tag_fixup_branches:
510 source_lod = source_groups[0][0]
511 source_lod_git_branch = \
512 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
514 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
515 author = self._map_author(Ctx().username)
516 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
518 self.f.write('commit %s\n' % (source_lod_git_branch,))
519 self.f.write('mark :%d\n' % (mark2,))
520 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
521 self.f.write('data %d\n' % (len(log_msg),))
522 self.f.write('%s\n' % (log_msg,))
524 self.f.write(
525 'merge :%d\n'
526 % (mark,)
529 self.f.write('\n')
531 self._mirror.end_commit()
533 def _get_log_msg_for_ancestry_tie(self, svn_commit):
534 return Ctx().text_wrapper.fill(
535 Ctx().tie_tag_ancestry_message % {
536 'symbol_name' : svn_commit.symbol.name,
540 def cleanup(self):
541 DVCSOutputOption.cleanup(self)
542 self.revision_writer.finish()
543 self.f.close()
544 del self.f