Remove member DumpstreamDelegate.dumpfile_path.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blob4f55ee82fb207ff19f307866bd5dd536d44e2b5b
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import FatalError
30 from cvs2svn_lib.log import Log
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.symbol import Trunk
33 from cvs2svn_lib.symbol import Branch
34 from cvs2svn_lib.symbol import Tag
35 from cvs2svn_lib.cvs_item import CVSSymbol
36 from cvs2svn_lib.dvcs_common import DVCSOutputOption
37 from cvs2svn_lib.dvcs_common import MirrorUpdater
38 from cvs2svn_lib.key_generator import KeyGenerator
41 class ExpectedDirectoryError(Exception):
42 """A file was found where a directory was expected."""
44 pass
47 class ExpectedFileError(Exception):
48 """A directory was found where a file was expected."""
50 pass
53 class GitRevisionWriter(MirrorUpdater):
55 def start(self, mirror, f):
56 super(GitRevisionWriter, self).start(mirror)
57 self.f = f
59 def _modify_file(self, cvs_item, post_commit):
60 raise NotImplementedError()
62 def add_file(self, cvs_rev, post_commit):
63 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
64 self._modify_file(cvs_rev, post_commit)
66 def modify_file(self, cvs_rev, post_commit):
67 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
68 self._modify_file(cvs_rev, post_commit)
70 def delete_file(self, cvs_rev, post_commit):
71 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
72 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
74 def branch_file(self, cvs_symbol):
75 super(GitRevisionWriter, self).branch_file(cvs_symbol)
76 self._modify_file(cvs_symbol, post_commit=False)
78 def finish(self):
79 super(GitRevisionWriter, self).finish()
80 del self.f
83 class GitRevisionMarkWriter(GitRevisionWriter):
84 def _modify_file(self, cvs_item, post_commit):
85 if cvs_item.cvs_file.executable:
86 mode = '100755'
87 else:
88 mode = '100644'
90 self.f.write(
91 'M %s :%d %s\n'
92 % (mode, cvs_item.revision_reader_token,
93 cvs_item.cvs_file.cvs_path,)
97 class GitRevisionInlineWriter(GitRevisionWriter):
98 def __init__(self, revision_reader):
99 self.revision_reader = revision_reader
101 def register_artifacts(self, which_pass):
102 GitRevisionWriter.register_artifacts(self, which_pass)
103 self.revision_reader.register_artifacts(which_pass)
105 def start(self, mirror, f):
106 GitRevisionWriter.start(self, mirror, f)
107 self.revision_reader.start()
109 def _modify_file(self, cvs_item, post_commit):
110 if cvs_item.cvs_file.executable:
111 mode = '100755'
112 else:
113 mode = '100644'
115 self.f.write(
116 'M %s inline %s\n'
117 % (mode, cvs_item.cvs_file.cvs_path,)
120 if isinstance(cvs_item, CVSSymbol):
121 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
122 else:
123 cvs_rev = cvs_item
125 # FIXME: We have to decide what to do about keyword substitution
126 # and eol_style here:
127 fulltext = self.revision_reader.get_content(cvs_rev)
129 self.f.write('data %d\n' % (len(fulltext),))
130 self.f.write(fulltext)
131 self.f.write('\n')
133 def finish(self):
134 GitRevisionWriter.finish(self)
135 self.revision_reader.finish()
138 class GitOutputOption(DVCSOutputOption):
139 """An OutputOption that outputs to a git-fast-import formatted file.
141 Members:
143 dump_filename -- (string) the name of the file to which the
144 git-fast-import commands for defining revisions will be
145 written.
147 author_transforms -- a map from CVS author names to git full name
148 and email address. See
149 DVCSOutputOption.normalize_author_transforms() for information
150 about the form of this parameter.
154 name = "Git"
156 # The first mark number used for git-fast-import commit marks. This
157 # value needs to be large to avoid conflicts with blob marks.
158 _first_commit_mark = 1000000000
160 def __init__(
161 self, dump_filename, revision_writer,
162 author_transforms=None,
163 tie_tag_fixup_branches=False,
165 """Constructor.
167 DUMP_FILENAME is the name of the file to which the git-fast-import
168 commands for defining revisions should be written. (Please note
169 that depending on the style of revision writer, the actual file
170 contents might not be written to this file.)
172 REVISION_WRITER is a GitRevisionWriter that is used to output
173 either the content of revisions or a mark that was previously used
174 to label a blob.
176 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
177 CVS author names to git full name and email address. All of the
178 contents should either be Unicode strings or 8-bit strings encoded
179 as UTF-8.
181 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
182 fixup branch, it should be psuedo-merged (ancestry linked but no
183 content changes) back into its source branch, to dispose of the
184 open head.
187 DVCSOutputOption.__init__(self)
188 self.dump_filename = dump_filename
189 self.revision_writer = revision_writer
191 self.author_transforms = self.normalize_author_transforms(
192 author_transforms
195 self.tie_tag_fixup_branches = tie_tag_fixup_branches
197 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
199 def register_artifacts(self, which_pass):
200 DVCSOutputOption.register_artifacts(self, which_pass)
201 self.revision_writer.register_artifacts(which_pass)
203 def check_symbols(self, symbol_map):
204 # FIXME: What constraints does git impose on symbols?
205 pass
207 def setup(self, svn_rev_count):
208 DVCSOutputOption.setup(self, svn_rev_count)
209 self.f = open(self.dump_filename, 'wb')
211 # The youngest revnum that has been committed so far:
212 self._youngest = 0
214 # A map {lod : [(revnum, mark)]} giving each of the revision
215 # numbers in which there was a commit to lod, and the mark active
216 # at the end of the revnum.
217 self._marks = {}
219 self.revision_writer.start(self._mirror, self.f)
221 def _create_commit_mark(self, lod, revnum):
222 mark = self._mark_generator.gen_id()
223 self._set_lod_mark(lod, revnum, mark)
224 return mark
226 def _set_lod_mark(self, lod, revnum, mark):
227 """Record MARK as the status of LOD for REVNUM.
229 If there is already an entry for REVNUM, overwrite it. If not,
230 append a new entry to the self._marks list for LOD."""
232 assert revnum >= self._youngest
233 entry = (revnum, mark)
234 try:
235 modifications = self._marks[lod]
236 except KeyError:
237 # This LOD hasn't appeared before; create a new list and add the
238 # entry:
239 self._marks[lod] = [entry]
240 else:
241 # A record exists, so it necessarily has at least one element:
242 if modifications[-1][0] == revnum:
243 modifications[-1] = entry
244 else:
245 modifications.append(entry)
246 self._youngest = revnum
248 def _get_author(self, svn_commit):
249 """Return the author to be used for SVN_COMMIT.
251 Return the author as a UTF-8 string in the form needed by git
252 fast-import; that is, 'name <email>'."""
254 cvs_author = svn_commit.get_author()
255 return self._map_author(cvs_author)
257 def _map_author(self, cvs_author):
258 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
260 @staticmethod
261 def _get_log_msg(svn_commit):
262 return svn_commit.get_log_msg()
264 def process_initial_project_commit(self, svn_commit):
265 self._mirror.start_commit(svn_commit.revnum)
266 self._mirror.end_commit()
268 def process_primary_commit(self, svn_commit):
269 author = self._get_author(svn_commit)
270 log_msg = self._get_log_msg(svn_commit)
272 lods = set()
273 for cvs_rev in svn_commit.get_cvs_items():
274 lods.add(cvs_rev.lod)
275 if len(lods) != 1:
276 raise InternalError('Commit affects %d LODs' % (len(lods),))
277 lod = lods.pop()
279 self._mirror.start_commit(svn_commit.revnum)
280 if isinstance(lod, Trunk):
281 # FIXME: is this correct?:
282 self.f.write('commit refs/heads/master\n')
283 else:
284 self.f.write('commit refs/heads/%s\n' % (lod.name,))
285 self.f.write(
286 'mark :%d\n'
287 % (self._create_commit_mark(lod, svn_commit.revnum),)
289 self.f.write(
290 'committer %s %d +0000\n' % (author, svn_commit.date,)
292 self.f.write('data %d\n' % (len(log_msg),))
293 self.f.write('%s\n' % (log_msg,))
294 for cvs_rev in svn_commit.get_cvs_items():
295 self.revision_writer.process_revision(cvs_rev, post_commit=False)
297 self.f.write('\n')
298 self._mirror.end_commit()
300 def process_post_commit(self, svn_commit):
301 author = self._get_author(svn_commit)
302 log_msg = self._get_log_msg(svn_commit)
304 source_lods = set()
305 for cvs_rev in svn_commit.cvs_revs:
306 source_lods.add(cvs_rev.lod)
307 if len(source_lods) != 1:
308 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
309 source_lod = source_lods.pop()
311 self._mirror.start_commit(svn_commit.revnum)
312 # FIXME: is this correct?:
313 self.f.write('commit refs/heads/master\n')
314 self.f.write(
315 'mark :%d\n'
316 % (self._create_commit_mark(None, svn_commit.revnum),)
318 self.f.write(
319 'committer %s %d +0000\n' % (author, svn_commit.date,)
321 self.f.write('data %d\n' % (len(log_msg),))
322 self.f.write('%s\n' % (log_msg,))
323 self.f.write(
324 'merge :%d\n'
325 % (self._get_source_mark(source_lod, svn_commit.revnum),)
327 for cvs_rev in svn_commit.cvs_revs:
328 self.revision_writer.process_revision(cvs_rev, post_commit=True)
330 self.f.write('\n')
331 self._mirror.end_commit()
333 def _get_source_mark(self, source_lod, revnum):
334 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
336 modifications = self._marks[source_lod]
337 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
338 (revnum, mark) = modifications[i]
339 return mark
341 def describe_lod_to_user(self, lod):
342 """This needs to make sense to users of the fastimported result."""
343 if isinstance(lod, Trunk):
344 return 'master'
345 else:
346 return lod.name
348 def _describe_commit(self, svn_commit, lod):
349 author = self._map_author(svn_commit.get_author())
350 if author.endswith(" <>"):
351 author = author[:-3]
352 date = time.strftime(
353 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
355 log_msg = svn_commit.get_log_msg()
356 if log_msg.find('\n') != -1:
357 log_msg = log_msg[:log_msg.index('\n')]
358 return "%s %s %s '%s'" % (
359 self.describe_lod_to_user(lod), date, author, log_msg,)
361 def _process_symbol_commit(
362 self, svn_commit, git_branch, source_groups, mark
364 author = self._get_author(svn_commit)
365 log_msg = self._get_log_msg(svn_commit)
367 # There are two distinct cases we need to care for here:
368 # 1. initial creation of a LOD
369 # 2. fixup of an existing LOD to include more files, because the LOD in
370 # CVS was created piecemeal over time, with intervening commits
372 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
373 # might be technically more correct (though _get_lod_history is currently
374 # underscore-private)
375 is_initial_lod_creation = svn_commit.symbol not in self._marks
377 if is_initial_lod_creation:
378 # Get the primary parent
379 p_source_lod, p_source_revnum, p_cvs_symbols = source_groups[0]
380 try:
381 p_source_node = self._mirror.get_old_lod_directory(
382 p_source_lod, p_source_revnum
384 except KeyError:
385 raise InternalError('Source %r does not exist' % (p_source_lod,))
386 cvs_files_to_delete = set(self._get_all_files(p_source_node))
388 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
389 for cvs_symbol in cvs_symbols:
390 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
392 # Write a trailer to the log message which describes the cherrypicks that
393 # make up this symbol creation.
394 log_msg += "\n"
395 if is_initial_lod_creation:
396 log_msg += "\nSprout from %s" % (
397 self._describe_commit(
398 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
399 p_source_lod
402 for (source_lod, source_revnum, cvs_symbols,) in source_groups[(is_initial_lod_creation and 1 or 0):]:
403 log_msg += "\nCherrypick from %s:" % (
404 self._describe_commit(
405 Ctx()._persistence_manager.get_svn_commit(source_revnum),
406 source_lod
409 for cvs_symbol in cvs_symbols:
410 log_msg += "\n %s" % (cvs_symbol.cvs_file.cvs_path,)
411 if is_initial_lod_creation:
412 if len(cvs_files_to_delete):
413 log_msg += "\nDelete:"
414 for cvs_file in sorted(cvs_files_to_delete):
415 log_msg += "\n %s" % (cvs_file.cvs_path,)
417 self.f.write('commit %s\n' % (git_branch,))
418 self.f.write('mark :%d\n' % (mark,))
419 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
420 self.f.write('data %d\n' % (len(log_msg),))
421 self.f.write('%s\n' % (log_msg,))
423 # Only record actual DVCS ancestry for the primary sprout parent,
424 # all the rest are effectively cherrypicks.
425 if is_initial_lod_creation:
426 self.f.write(
427 'from :%d\n'
428 % (self._get_source_mark(p_source_lod, p_source_revnum),)
431 for (source_lod, source_revnum, cvs_symbols,) in source_groups:
432 for cvs_symbol in cvs_symbols:
433 self.revision_writer.branch_file(cvs_symbol)
435 if is_initial_lod_creation:
436 for cvs_file in cvs_files_to_delete:
437 self.f.write('D %s\n' % (cvs_file.cvs_path,))
439 self.f.write('\n')
441 def process_branch_commit(self, svn_commit):
442 self._mirror.start_commit(svn_commit.revnum)
444 source_groups = list(self._get_source_groups(svn_commit))
445 if self._is_simple_copy(svn_commit, source_groups):
446 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
447 Log().debug(
448 '%s will be created via a simple copy from %s:r%d'
449 % (svn_commit.symbol, source_lod, source_revnum,)
451 mark = self._get_source_mark(source_lod, source_revnum)
452 self._set_symbol(svn_commit.symbol, mark)
453 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
454 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
455 else:
456 Log().debug(
457 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
459 self._process_symbol_commit(
460 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
461 source_groups,
462 self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
465 self._mirror.end_commit()
467 def _set_symbol(self, symbol, mark):
468 if isinstance(symbol, Branch):
469 category = 'heads'
470 elif isinstance(symbol, Tag):
471 category = 'tags'
472 else:
473 raise InternalError()
474 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
475 self.f.write('from :%d\n' % (mark,))
477 def get_tag_fixup_branch_name(self, svn_commit):
478 # The branch name to use for the "tag fixup branches". The
479 # git-fast-import documentation suggests using 'TAG_FIXUP'
480 # (outside of the refs/heads namespace), but this is currently
481 # broken. Use a name containing '.', which is not allowed in CVS
482 # symbols, to avoid conflicts (though of course a conflict could
483 # still result if the user requests symbol transformations).
484 return 'refs/heads/TAG.FIXUP'
486 def process_tag_commit(self, svn_commit):
487 # FIXME: For now we create a fixup branch with the same name as
488 # the tag, then the tag. We never delete the fixup branch.
489 self._mirror.start_commit(svn_commit.revnum)
491 source_groups = list(self._get_source_groups(svn_commit))
492 if self._is_simple_copy(svn_commit, source_groups):
493 (source_lod, source_revnum, cvs_symbols) = source_groups[0]
494 Log().debug(
495 '%s will be created via a simple copy from %s:r%d'
496 % (svn_commit.symbol, source_lod, source_revnum,)
498 mark = self._get_source_mark(source_lod, source_revnum)
499 self._set_symbol(svn_commit.symbol, mark)
500 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
501 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
502 else:
503 Log().debug(
504 '%s will be created via a fixup branch' % (svn_commit.symbol,)
507 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
509 # Create the fixup branch (which might involve making more than
510 # one commit):
511 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
512 self._process_symbol_commit(
513 svn_commit, fixup_branch_name, source_groups, mark
516 # Store the mark of the last commit to the fixup branch as the
517 # value of the tag:
518 self._set_symbol(svn_commit.symbol, mark)
519 self.f.write('reset %s\n' % (fixup_branch_name,))
520 self.f.write('\n')
522 if self.tie_tag_fixup_branches:
523 source_lod = source_groups[0][0]
524 source_lod_git_branch = \
525 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
527 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
528 author = self._map_author(Ctx().username)
529 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
531 self.f.write('commit %s\n' % (source_lod_git_branch,))
532 self.f.write('mark :%d\n' % (mark2,))
533 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
534 self.f.write('data %d\n' % (len(log_msg),))
535 self.f.write('%s\n' % (log_msg,))
537 self.f.write(
538 'merge :%d\n'
539 % (mark,)
542 self.f.write('\n')
544 self._mirror.end_commit()
546 def _get_log_msg_for_ancestry_tie(self, svn_commit):
547 return Ctx().text_wrapper.fill(
548 Ctx().tie_tag_ancestry_message % {
549 'symbol_name' : svn_commit.symbol.name,
553 def cleanup(self):
554 DVCSOutputOption.cleanup(self)
555 self.revision_writer.finish()
556 self.f.close()
557 del self.f