Use ctx.tmpdir consistently in cvs2git-example.options.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blobbc8a684df1f370e47b926d73c963db6f651c0982
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import sys
26 import bisect
27 import time
28 import shutil
30 from cvs2svn_lib import config
31 from cvs2svn_lib.common import InternalError
32 from cvs2svn_lib.log import logger
33 from cvs2svn_lib.context import Ctx
34 from cvs2svn_lib.symbol import Trunk
35 from cvs2svn_lib.symbol import Branch
36 from cvs2svn_lib.symbol import Tag
37 from cvs2svn_lib.cvs_item import CVSSymbol
38 from cvs2svn_lib.dvcs_common import DVCSOutputOption
39 from cvs2svn_lib.dvcs_common import MirrorUpdater
40 from cvs2svn_lib.key_generator import KeyGenerator
41 from cvs2svn_lib.artifact_manager import artifact_manager
44 class GitRevisionWriter(MirrorUpdater):
46 def start(self, mirror, f):
47 MirrorUpdater.start(self, mirror)
48 self.f = f
50 def _modify_file(self, cvs_item, post_commit):
51 raise NotImplementedError()
53 def add_file(self, cvs_rev, post_commit):
54 MirrorUpdater.add_file(self, cvs_rev, post_commit)
55 self._modify_file(cvs_rev, post_commit)
57 def modify_file(self, cvs_rev, post_commit):
58 MirrorUpdater.modify_file(self, cvs_rev, post_commit)
59 self._modify_file(cvs_rev, post_commit)
61 def delete_file(self, cvs_rev, post_commit):
62 MirrorUpdater.delete_file(self, cvs_rev, post_commit)
63 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
65 def branch_file(self, cvs_symbol):
66 MirrorUpdater.branch_file(self, cvs_symbol)
67 self._modify_file(cvs_symbol, post_commit=False)
69 def finish(self):
70 MirrorUpdater.finish(self)
71 del self.f
74 class GitRevisionMarkWriter(GitRevisionWriter):
75 def register_artifacts(self, which_pass):
76 GitRevisionWriter.register_artifacts(self, which_pass)
77 if Ctx().revision_collector.blob_filename is None:
78 artifact_manager.register_temp_file_needed(
79 config.GIT_BLOB_DATAFILE, which_pass,
82 def start(self, mirror, f):
83 GitRevisionWriter.start(self, mirror, f)
84 if Ctx().revision_collector.blob_filename is None:
85 # The revision collector wrote the blobs to a temporary file;
86 # copy them into f:
87 logger.normal('Copying blob data to output')
88 blobf = open(
89 artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'rb',
91 shutil.copyfileobj(blobf, f)
92 blobf.close()
94 def _modify_file(self, cvs_item, post_commit):
95 if cvs_item.cvs_file.executable:
96 mode = '100755'
97 else:
98 mode = '100644'
100 self.f.write(
101 'M %s :%d %s\n'
102 % (mode, cvs_item.revision_reader_token,
103 cvs_item.cvs_file.cvs_path,)
107 class GitRevisionInlineWriter(GitRevisionWriter):
108 def __init__(self, revision_reader):
109 self.revision_reader = revision_reader
111 def register_artifacts(self, which_pass):
112 GitRevisionWriter.register_artifacts(self, which_pass)
113 self.revision_reader.register_artifacts(which_pass)
115 def start(self, mirror, f):
116 GitRevisionWriter.start(self, mirror, f)
117 self.revision_reader.start()
119 def _modify_file(self, cvs_item, post_commit):
120 if cvs_item.cvs_file.executable:
121 mode = '100755'
122 else:
123 mode = '100644'
125 self.f.write(
126 'M %s inline %s\n'
127 % (mode, cvs_item.cvs_file.cvs_path,)
130 if isinstance(cvs_item, CVSSymbol):
131 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
132 else:
133 cvs_rev = cvs_item
135 # FIXME: We have to decide what to do about keyword substitution
136 # and eol_style here:
137 fulltext = self.revision_reader.get_content(cvs_rev)
139 self.f.write('data %d\n' % (len(fulltext),))
140 self.f.write(fulltext)
141 self.f.write('\n')
143 def finish(self):
144 GitRevisionWriter.finish(self)
145 self.revision_reader.finish()
148 class GitOutputOption(DVCSOutputOption):
149 """An OutputOption that outputs to a git-fast-import formatted file.
151 Members:
153 dump_filename -- (string or None) the name of the file to which
154 the git-fast-import commands for defining revisions will be
155 written. If None, the data will be written to stdout.
157 author_transforms -- a map from CVS author names to git full name
158 and email address. See
159 DVCSOutputOption.normalize_author_transforms() for information
160 about the form of this parameter.
164 name = "Git"
166 # The first mark number used for git-fast-import commit marks. This
167 # value needs to be large to avoid conflicts with blob marks.
168 _first_commit_mark = 1000000000
170 def __init__(
171 self, revision_writer,
172 dump_filename=None,
173 author_transforms=None,
174 tie_tag_fixup_branches=False,
176 """Constructor.
178 REVISION_WRITER is a GitRevisionWriter that is used to output
179 either the content of revisions or a mark that was previously used
180 to label a blob.
182 DUMP_FILENAME is the name of the file to which the git-fast-import
183 commands for defining revisions should be written. (Please note
184 that depending on the style of revision writer, the actual file
185 contents might not be written to this file.) If it is None, then
186 the output is written to stdout.
188 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
189 CVS author names to git full name and email address. All of the
190 contents should either be Unicode strings or 8-bit strings encoded
191 as UTF-8.
193 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
194 fixup branch, it should be psuedo-merged (ancestry linked but no
195 content changes) back into its source branch, to dispose of the
196 open head.
199 DVCSOutputOption.__init__(self)
200 self.dump_filename = dump_filename
201 self.revision_writer = revision_writer
203 self.author_transforms = self.normalize_author_transforms(
204 author_transforms
207 self.tie_tag_fixup_branches = tie_tag_fixup_branches
209 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
211 def register_artifacts(self, which_pass):
212 DVCSOutputOption.register_artifacts(self, which_pass)
213 self.revision_writer.register_artifacts(which_pass)
215 def check_symbols(self, symbol_map):
216 # FIXME: What constraints does git impose on symbols?
217 pass
219 def setup(self, svn_rev_count):
220 DVCSOutputOption.setup(self, svn_rev_count)
221 if self.dump_filename is None:
222 self.f = sys.stdout
223 else:
224 self.f = open(self.dump_filename, 'wb')
226 # The youngest revnum that has been committed so far:
227 self._youngest = 0
229 # A map {lod : [(revnum, mark)]} giving each of the revision
230 # numbers in which there was a commit to lod, and the mark active
231 # at the end of the revnum.
232 self._marks = {}
234 self.revision_writer.start(self._mirror, self.f)
236 def _create_commit_mark(self, lod, revnum):
237 mark = self._mark_generator.gen_id()
238 self._set_lod_mark(lod, revnum, mark)
239 return mark
241 def _set_lod_mark(self, lod, revnum, mark):
242 """Record MARK as the status of LOD for REVNUM.
244 If there is already an entry for REVNUM, overwrite it. If not,
245 append a new entry to the self._marks list for LOD."""
247 assert revnum >= self._youngest
248 entry = (revnum, mark)
249 try:
250 modifications = self._marks[lod]
251 except KeyError:
252 # This LOD hasn't appeared before; create a new list and add the
253 # entry:
254 self._marks[lod] = [entry]
255 else:
256 # A record exists, so it necessarily has at least one element:
257 if modifications[-1][0] == revnum:
258 modifications[-1] = entry
259 else:
260 modifications.append(entry)
261 self._youngest = revnum
263 def _get_author(self, svn_commit):
264 """Return the author to be used for SVN_COMMIT.
266 Return the author as a UTF-8 string in the form needed by git
267 fast-import; that is, 'name <email>'."""
269 cvs_author = svn_commit.get_author()
270 return self._map_author(cvs_author)
272 def _map_author(self, cvs_author):
273 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
275 @staticmethod
276 def _get_log_msg(svn_commit):
277 return svn_commit.get_log_msg()
279 def process_initial_project_commit(self, svn_commit):
280 self._mirror.start_commit(svn_commit.revnum)
281 self._mirror.end_commit()
283 def process_primary_commit(self, svn_commit):
284 author = self._get_author(svn_commit)
285 log_msg = self._get_log_msg(svn_commit)
287 lods = set()
288 for cvs_rev in svn_commit.get_cvs_items():
289 lods.add(cvs_rev.lod)
290 if len(lods) != 1:
291 raise InternalError('Commit affects %d LODs' % (len(lods),))
292 lod = lods.pop()
294 self._mirror.start_commit(svn_commit.revnum)
295 if isinstance(lod, Trunk):
296 # FIXME: is this correct?:
297 self.f.write('commit refs/heads/master\n')
298 else:
299 self.f.write('commit refs/heads/%s\n' % (lod.name,))
300 mark = self._create_commit_mark(lod, svn_commit.revnum)
301 logger.normal(
302 'Writing commit r%d on %s (mark :%d)'
303 % (svn_commit.revnum, lod, mark,)
305 self.f.write('mark :%d\n' % (mark,))
306 self.f.write(
307 'committer %s %d +0000\n' % (author, svn_commit.date,)
309 self.f.write('data %d\n' % (len(log_msg),))
310 self.f.write('%s\n' % (log_msg,))
311 for cvs_rev in svn_commit.get_cvs_items():
312 self.revision_writer.process_revision(cvs_rev, post_commit=False)
314 self.f.write('\n')
315 self._mirror.end_commit()
317 def process_post_commit(self, svn_commit):
318 author = self._get_author(svn_commit)
319 log_msg = self._get_log_msg(svn_commit)
321 source_lods = set()
322 for cvs_rev in svn_commit.cvs_revs:
323 source_lods.add(cvs_rev.lod)
324 if len(source_lods) != 1:
325 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
326 source_lod = source_lods.pop()
328 self._mirror.start_commit(svn_commit.revnum)
329 # FIXME: is this correct?:
330 self.f.write('commit refs/heads/master\n')
331 mark = self._create_commit_mark(None, svn_commit.revnum)
332 logger.normal(
333 'Writing post-commit r%d on Trunk (mark :%d)'
334 % (svn_commit.revnum, mark,)
336 self.f.write('mark :%d\n' % (mark,))
337 self.f.write(
338 'committer %s %d +0000\n' % (author, svn_commit.date,)
340 self.f.write('data %d\n' % (len(log_msg),))
341 self.f.write('%s\n' % (log_msg,))
342 self.f.write(
343 'merge :%d\n'
344 % (self._get_source_mark(source_lod, svn_commit.revnum),)
346 for cvs_rev in svn_commit.cvs_revs:
347 self.revision_writer.process_revision(cvs_rev, post_commit=True)
349 self.f.write('\n')
350 self._mirror.end_commit()
352 def _get_source_mark(self, source_lod, revnum):
353 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
355 modifications = self._marks[source_lod]
356 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
357 (revnum, mark) = modifications[i]
358 return mark
360 def describe_lod_to_user(self, lod):
361 """This needs to make sense to users of the fastimported result."""
362 if isinstance(lod, Trunk):
363 return 'master'
364 else:
365 return lod.name
367 def _describe_commit(self, svn_commit, lod):
368 author = self._map_author(svn_commit.get_author())
369 if author.endswith(" <>"):
370 author = author[:-3]
371 date = time.strftime(
372 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
374 log_msg = svn_commit.get_log_msg()
375 if log_msg.find('\n') != -1:
376 log_msg = log_msg[:log_msg.index('\n')]
377 return "%s %s %s '%s'" % (
378 self.describe_lod_to_user(lod), date, author, log_msg,)
380 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
381 author = self._get_author(svn_commit)
382 log_msg = self._get_log_msg(svn_commit)
384 # There are two distinct cases we need to care for here:
385 # 1. initial creation of a LOD
386 # 2. fixup of an existing LOD to include more files, because the LOD in
387 # CVS was created piecemeal over time, with intervening commits
389 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
390 # might be technically more correct (though _get_lod_history is currently
391 # underscore-private)
392 is_initial_lod_creation = svn_commit.symbol not in self._marks
394 # Create the mark, only after the check above
395 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
397 if is_initial_lod_creation:
398 # Get the primary parent
399 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
400 try:
401 p_source_node = self._mirror.get_old_lod_directory(
402 p_source_lod, p_source_revnum
404 except KeyError:
405 raise InternalError('Source %r does not exist' % (p_source_lod,))
406 cvs_files_to_delete = set(self._get_all_files(p_source_node))
408 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
409 for cvs_symbol in cvs_symbols:
410 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
412 # Write a trailer to the log message which describes the cherrypicks that
413 # make up this symbol creation.
414 log_msg += "\n"
415 if is_initial_lod_creation:
416 log_msg += "\nSprout from %s" % (
417 self._describe_commit(
418 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
419 p_source_lod
422 for (source_revnum, source_lod, cvs_symbols,) \
423 in source_groups[(is_initial_lod_creation and 1 or 0):]:
424 log_msg += "\nCherrypick from %s:" % (
425 self._describe_commit(
426 Ctx()._persistence_manager.get_svn_commit(source_revnum),
427 source_lod
430 for cvs_path in sorted(
431 cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols
433 log_msg += "\n %s" % (cvs_path,)
434 if is_initial_lod_creation:
435 if cvs_files_to_delete:
436 log_msg += "\nDelete:"
437 for cvs_path in sorted(
438 cvs_file.cvs_path for cvs_file in cvs_files_to_delete
440 log_msg += "\n %s" % (cvs_path,)
442 self.f.write('commit %s\n' % (git_branch,))
443 self.f.write('mark :%d\n' % (mark,))
444 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
445 self.f.write('data %d\n' % (len(log_msg),))
446 self.f.write('%s\n' % (log_msg,))
448 # Only record actual DVCS ancestry for the primary sprout parent,
449 # all the rest are effectively cherrypicks.
450 if is_initial_lod_creation:
451 self.f.write(
452 'from :%d\n'
453 % (self._get_source_mark(p_source_lod, p_source_revnum),)
456 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
457 for cvs_symbol in cvs_symbols:
458 self.revision_writer.branch_file(cvs_symbol)
460 if is_initial_lod_creation:
461 for cvs_file in cvs_files_to_delete:
462 self.f.write('D %s\n' % (cvs_file.cvs_path,))
464 self.f.write('\n')
465 return mark
467 def process_branch_commit(self, svn_commit):
468 self._mirror.start_commit(svn_commit.revnum)
470 source_groups = self._get_source_groups(svn_commit)
471 if self._is_simple_copy(svn_commit, source_groups):
472 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
473 logger.debug(
474 '%s will be created via a simple copy from %s:r%d'
475 % (svn_commit.symbol, source_lod, source_revnum,)
477 mark = self._get_source_mark(source_lod, source_revnum)
478 self._set_symbol(svn_commit.symbol, mark)
479 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
480 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
481 else:
482 logger.debug(
483 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
485 self._process_symbol_commit(
486 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
487 source_groups,
490 self._mirror.end_commit()
492 def _set_symbol(self, symbol, mark):
493 if isinstance(symbol, Branch):
494 category = 'heads'
495 elif isinstance(symbol, Tag):
496 category = 'tags'
497 else:
498 raise InternalError()
499 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
500 self.f.write('from :%d\n' % (mark,))
502 def get_tag_fixup_branch_name(self, svn_commit):
503 # The branch name to use for the "tag fixup branches". The
504 # git-fast-import documentation suggests using 'TAG_FIXUP'
505 # (outside of the refs/heads namespace), but this is currently
506 # broken. Use a name containing '.', which is not allowed in CVS
507 # symbols, to avoid conflicts (though of course a conflict could
508 # still result if the user requests symbol transformations).
509 return 'refs/heads/TAG.FIXUP'
511 def process_tag_commit(self, svn_commit):
512 # FIXME: For now we create a fixup branch with the same name as
513 # the tag, then the tag. We never delete the fixup branch.
514 self._mirror.start_commit(svn_commit.revnum)
516 source_groups = self._get_source_groups(svn_commit)
517 if self._is_simple_copy(svn_commit, source_groups):
518 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
519 logger.debug(
520 '%s will be created via a simple copy from %s:r%d'
521 % (svn_commit.symbol, source_lod, source_revnum,)
523 mark = self._get_source_mark(source_lod, source_revnum)
524 self._set_symbol(svn_commit.symbol, mark)
525 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
526 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
527 else:
528 logger.debug(
529 '%s will be created via a fixup branch' % (svn_commit.symbol,)
532 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
534 # Create the fixup branch (which might involve making more than
535 # one commit):
536 mark = self._process_symbol_commit(
537 svn_commit, fixup_branch_name, source_groups
540 # Store the mark of the last commit to the fixup branch as the
541 # value of the tag:
542 self._set_symbol(svn_commit.symbol, mark)
543 self.f.write('reset %s\n' % (fixup_branch_name,))
544 self.f.write('\n')
546 if self.tie_tag_fixup_branches:
547 source_lod = source_groups[0][1]
548 source_lod_git_branch = \
549 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
551 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
552 author = self._map_author(Ctx().username)
553 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
555 self.f.write('commit %s\n' % (source_lod_git_branch,))
556 self.f.write('mark :%d\n' % (mark2,))
557 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
558 self.f.write('data %d\n' % (len(log_msg),))
559 self.f.write('%s\n' % (log_msg,))
561 self.f.write(
562 'merge :%d\n'
563 % (mark,)
566 self.f.write('\n')
568 self._mirror.end_commit()
570 def _get_log_msg_for_ancestry_tie(self, svn_commit):
571 return Ctx().text_wrapper.fill(
572 Ctx().tie_tag_ancestry_message % {
573 'symbol_name' : svn_commit.symbol.name,
577 def cleanup(self):
578 DVCSOutputOption.cleanup(self)
579 self.revision_writer.finish()
580 if self.dump_filename is not None:
581 self.f.close()
582 del self.f