Bring CHANGES up to date.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blob60a9c02d9b0f8763724058a36669553e4892e12c
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import sys
26 import bisect
27 import time
28 import shutil
30 from cvs2svn_lib import config
31 from cvs2svn_lib.common import InternalError
32 from cvs2svn_lib.log import logger
33 from cvs2svn_lib.context import Ctx
34 from cvs2svn_lib.symbol import Trunk
35 from cvs2svn_lib.symbol import Branch
36 from cvs2svn_lib.symbol import Tag
37 from cvs2svn_lib.cvs_item import CVSSymbol
38 from cvs2svn_lib.dvcs_common import DVCSOutputOption
39 from cvs2svn_lib.dvcs_common import MirrorUpdater
40 from cvs2svn_lib.key_generator import KeyGenerator
41 from cvs2svn_lib.artifact_manager import artifact_manager
43 def cvs_item_is_executable(cvs_item):
44 return 'svn:executable' in cvs_item.cvs_file.properties
46 class GitRevisionWriter(MirrorUpdater):
48 def start(self, mirror, f):
49 MirrorUpdater.start(self, mirror)
50 self.f = f
52 def _modify_file(self, cvs_item, post_commit):
53 raise NotImplementedError()
55 def add_file(self, cvs_rev, post_commit):
56 MirrorUpdater.add_file(self, cvs_rev, post_commit)
57 self._modify_file(cvs_rev, post_commit)
59 def modify_file(self, cvs_rev, post_commit):
60 MirrorUpdater.modify_file(self, cvs_rev, post_commit)
61 self._modify_file(cvs_rev, post_commit)
63 def delete_file(self, cvs_rev, post_commit):
64 MirrorUpdater.delete_file(self, cvs_rev, post_commit)
65 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
67 def branch_file(self, cvs_symbol):
68 MirrorUpdater.branch_file(self, cvs_symbol)
69 self._modify_file(cvs_symbol, post_commit=False)
71 def finish(self):
72 MirrorUpdater.finish(self)
73 del self.f
76 class GitRevisionMarkWriter(GitRevisionWriter):
77 def register_artifacts(self, which_pass):
78 GitRevisionWriter.register_artifacts(self, which_pass)
79 if Ctx().revision_collector.blob_filename is None:
80 artifact_manager.register_temp_file_needed(
81 config.GIT_BLOB_DATAFILE, which_pass,
84 def start(self, mirror, f):
85 GitRevisionWriter.start(self, mirror, f)
86 if Ctx().revision_collector.blob_filename is None:
87 # The revision collector wrote the blobs to a temporary file;
88 # copy them into f:
89 logger.normal('Copying blob data to output')
90 blobf = open(
91 artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'rb',
93 shutil.copyfileobj(blobf, f)
94 blobf.close()
96 def _modify_file(self, cvs_item, post_commit):
97 if cvs_item_is_executable(cvs_item):
98 mode = '100755'
99 else:
100 mode = '100644'
102 self.f.write(
103 'M %s :%d %s\n'
104 % (mode, cvs_item.revision_reader_token,
105 cvs_item.cvs_file.cvs_path,)
109 class GitRevisionInlineWriter(GitRevisionWriter):
110 def __init__(self, revision_reader):
111 self.revision_reader = revision_reader
113 def register_artifacts(self, which_pass):
114 GitRevisionWriter.register_artifacts(self, which_pass)
115 self.revision_reader.register_artifacts(which_pass)
117 def start(self, mirror, f):
118 GitRevisionWriter.start(self, mirror, f)
119 self.revision_reader.start()
121 def _modify_file(self, cvs_item, post_commit):
122 if cvs_item_is_executable(cvs_item):
123 mode = '100755'
124 else:
125 mode = '100644'
127 self.f.write(
128 'M %s inline %s\n'
129 % (mode, cvs_item.cvs_file.cvs_path,)
132 if isinstance(cvs_item, CVSSymbol):
133 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
134 else:
135 cvs_rev = cvs_item
137 # FIXME: We have to decide what to do about keyword substitution
138 # and eol_style here:
139 fulltext = self.revision_reader.get_content(cvs_rev)
141 self.f.write('data %d\n' % (len(fulltext),))
142 self.f.write(fulltext)
143 self.f.write('\n')
145 def finish(self):
146 GitRevisionWriter.finish(self)
147 self.revision_reader.finish()
150 class GitOutputOption(DVCSOutputOption):
151 """An OutputOption that outputs to a git-fast-import formatted file.
153 Members:
155 dump_filename -- (string or None) the name of the file to which
156 the git-fast-import commands for defining revisions will be
157 written. If None, the data will be written to stdout.
159 author_transforms -- a map from CVS author names to git full name
160 and email address. See
161 DVCSOutputOption.normalize_author_transforms() for information
162 about the form of this parameter.
166 name = "Git"
168 # The first mark number used for git-fast-import commit marks. This
169 # value needs to be large to avoid conflicts with blob marks.
170 _first_commit_mark = 1000000000
172 def __init__(
173 self, revision_writer,
174 dump_filename=None,
175 author_transforms=None,
176 tie_tag_fixup_branches=False,
178 """Constructor.
180 REVISION_WRITER is a GitRevisionWriter that is used to output
181 either the content of revisions or a mark that was previously used
182 to label a blob.
184 DUMP_FILENAME is the name of the file to which the git-fast-import
185 commands for defining revisions should be written. (Please note
186 that depending on the style of revision writer, the actual file
187 contents might not be written to this file.) If it is None, then
188 the output is written to stdout.
190 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
191 CVS author names to git full name and email address. All of the
192 contents should either be Unicode strings or 8-bit strings encoded
193 as UTF-8.
195 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
196 fixup branch, it should be psuedo-merged (ancestry linked but no
197 content changes) back into its source branch, to dispose of the
198 open head.
201 DVCSOutputOption.__init__(self)
202 self.dump_filename = dump_filename
203 self.revision_writer = revision_writer
205 self.author_transforms = self.normalize_author_transforms(
206 author_transforms
209 self.tie_tag_fixup_branches = tie_tag_fixup_branches
211 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
213 def register_artifacts(self, which_pass):
214 DVCSOutputOption.register_artifacts(self, which_pass)
215 self.revision_writer.register_artifacts(which_pass)
217 def check_symbols(self, symbol_map):
218 # FIXME: What constraints does git impose on symbols?
219 pass
221 def setup(self, svn_rev_count):
222 DVCSOutputOption.setup(self, svn_rev_count)
223 if self.dump_filename is None:
224 self.f = sys.stdout
225 else:
226 self.f = open(self.dump_filename, 'wb')
228 # The youngest revnum that has been committed so far:
229 self._youngest = 0
231 # A map {lod : [(revnum, mark)]} giving each of the revision
232 # numbers in which there was a commit to lod, and the mark active
233 # at the end of the revnum.
234 self._marks = {}
236 self.revision_writer.start(self._mirror, self.f)
238 def _create_commit_mark(self, lod, revnum):
239 mark = self._mark_generator.gen_id()
240 self._set_lod_mark(lod, revnum, mark)
241 return mark
243 def _set_lod_mark(self, lod, revnum, mark):
244 """Record MARK as the status of LOD for REVNUM.
246 If there is already an entry for REVNUM, overwrite it. If not,
247 append a new entry to the self._marks list for LOD."""
249 assert revnum >= self._youngest
250 entry = (revnum, mark)
251 try:
252 modifications = self._marks[lod]
253 except KeyError:
254 # This LOD hasn't appeared before; create a new list and add the
255 # entry:
256 self._marks[lod] = [entry]
257 else:
258 # A record exists, so it necessarily has at least one element:
259 if modifications[-1][0] == revnum:
260 modifications[-1] = entry
261 else:
262 modifications.append(entry)
263 self._youngest = revnum
265 def _get_author(self, svn_commit):
266 """Return the author to be used for SVN_COMMIT.
268 Return the author as a UTF-8 string in the form needed by git
269 fast-import; that is, 'name <email>'."""
271 cvs_author = svn_commit.get_author()
272 return self._map_author(cvs_author)
274 def _map_author(self, cvs_author):
275 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
277 @staticmethod
278 def _get_log_msg(svn_commit):
279 return svn_commit.get_log_msg()
281 def process_initial_project_commit(self, svn_commit):
282 self._mirror.start_commit(svn_commit.revnum)
283 self._mirror.end_commit()
285 def process_primary_commit(self, svn_commit):
286 author = self._get_author(svn_commit)
287 log_msg = self._get_log_msg(svn_commit)
289 lods = set()
290 for cvs_rev in svn_commit.get_cvs_items():
291 lods.add(cvs_rev.lod)
292 if len(lods) != 1:
293 raise InternalError('Commit affects %d LODs' % (len(lods),))
294 lod = lods.pop()
296 self._mirror.start_commit(svn_commit.revnum)
297 if isinstance(lod, Trunk):
298 # FIXME: is this correct?:
299 self.f.write('commit refs/heads/master\n')
300 else:
301 self.f.write('commit refs/heads/%s\n' % (lod.name,))
302 mark = self._create_commit_mark(lod, svn_commit.revnum)
303 logger.normal(
304 'Writing commit r%d on %s (mark :%d)'
305 % (svn_commit.revnum, lod, mark,)
307 self.f.write('mark :%d\n' % (mark,))
308 self.f.write(
309 'committer %s %d +0000\n' % (author, svn_commit.date,)
311 self.f.write('data %d\n' % (len(log_msg),))
312 self.f.write('%s\n' % (log_msg,))
313 for cvs_rev in svn_commit.get_cvs_items():
314 self.revision_writer.process_revision(cvs_rev, post_commit=False)
316 self.f.write('\n')
317 self._mirror.end_commit()
319 def process_post_commit(self, svn_commit):
320 author = self._get_author(svn_commit)
321 log_msg = self._get_log_msg(svn_commit)
323 source_lods = set()
324 for cvs_rev in svn_commit.cvs_revs:
325 source_lods.add(cvs_rev.lod)
326 if len(source_lods) != 1:
327 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
328 source_lod = source_lods.pop()
330 self._mirror.start_commit(svn_commit.revnum)
331 # FIXME: is this correct?:
332 self.f.write('commit refs/heads/master\n')
333 mark = self._create_commit_mark(None, svn_commit.revnum)
334 logger.normal(
335 'Writing post-commit r%d on Trunk (mark :%d)'
336 % (svn_commit.revnum, mark,)
338 self.f.write('mark :%d\n' % (mark,))
339 self.f.write(
340 'committer %s %d +0000\n' % (author, svn_commit.date,)
342 self.f.write('data %d\n' % (len(log_msg),))
343 self.f.write('%s\n' % (log_msg,))
344 self.f.write(
345 'merge :%d\n'
346 % (self._get_source_mark(source_lod, svn_commit.revnum),)
348 for cvs_rev in svn_commit.cvs_revs:
349 self.revision_writer.process_revision(cvs_rev, post_commit=True)
351 self.f.write('\n')
352 self._mirror.end_commit()
354 def _get_source_mark(self, source_lod, revnum):
355 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
357 modifications = self._marks[source_lod]
358 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
359 (revnum, mark) = modifications[i]
360 return mark
362 def describe_lod_to_user(self, lod):
363 """This needs to make sense to users of the fastimported result."""
364 if isinstance(lod, Trunk):
365 return 'master'
366 else:
367 return lod.name
369 def _describe_commit(self, svn_commit, lod):
370 author = self._map_author(svn_commit.get_author())
371 if author.endswith(" <>"):
372 author = author[:-3]
373 date = time.strftime(
374 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
376 log_msg = svn_commit.get_log_msg()
377 if log_msg.find('\n') != -1:
378 log_msg = log_msg[:log_msg.index('\n')]
379 return "%s %s %s '%s'" % (
380 self.describe_lod_to_user(lod), date, author, log_msg,)
382 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
383 author = self._get_author(svn_commit)
384 log_msg = self._get_log_msg(svn_commit)
386 # There are two distinct cases we need to care for here:
387 # 1. initial creation of a LOD
388 # 2. fixup of an existing LOD to include more files, because the LOD in
389 # CVS was created piecemeal over time, with intervening commits
391 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
392 # might be technically more correct (though _get_lod_history is currently
393 # underscore-private)
394 is_initial_lod_creation = svn_commit.symbol not in self._marks
396 # Create the mark, only after the check above
397 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
399 if is_initial_lod_creation:
400 # Get the primary parent
401 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
402 try:
403 p_source_node = self._mirror.get_old_lod_directory(
404 p_source_lod, p_source_revnum
406 except KeyError:
407 raise InternalError('Source %r does not exist' % (p_source_lod,))
408 cvs_files_to_delete = set(self._get_all_files(p_source_node))
410 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
411 for cvs_symbol in cvs_symbols:
412 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
414 self.f.write('commit %s\n' % (git_branch,))
415 self.f.write('mark :%d\n' % (mark,))
416 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
417 self.f.write('data %d\n' % (len(log_msg),))
418 self.f.write('%s\n' % (log_msg,))
420 # Only record actual DVCS ancestry for the primary sprout parent,
421 # all the rest are effectively cherrypicks.
422 if is_initial_lod_creation:
423 self.f.write(
424 'from :%d\n'
425 % (self._get_source_mark(p_source_lod, p_source_revnum),)
428 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
429 for cvs_symbol in cvs_symbols:
430 self.revision_writer.branch_file(cvs_symbol)
432 if is_initial_lod_creation:
433 for cvs_file in cvs_files_to_delete:
434 self.f.write('D %s\n' % (cvs_file.cvs_path,))
436 self.f.write('\n')
437 return mark
439 def process_branch_commit(self, svn_commit):
440 self._mirror.start_commit(svn_commit.revnum)
442 source_groups = self._get_source_groups(svn_commit)
443 if self._is_simple_copy(svn_commit, source_groups):
444 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
445 logger.debug(
446 '%s will be created via a simple copy from %s:r%d'
447 % (svn_commit.symbol, source_lod, source_revnum,)
449 mark = self._get_source_mark(source_lod, source_revnum)
450 self._set_symbol(svn_commit.symbol, mark)
451 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
452 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
453 else:
454 logger.debug(
455 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
457 self._process_symbol_commit(
458 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
459 source_groups,
462 self._mirror.end_commit()
464 def _set_symbol(self, symbol, mark):
465 if isinstance(symbol, Branch):
466 category = 'heads'
467 elif isinstance(symbol, Tag):
468 category = 'tags'
469 else:
470 raise InternalError()
471 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
472 self.f.write('from :%d\n' % (mark,))
474 def get_tag_fixup_branch_name(self, svn_commit):
475 # The branch name to use for the "tag fixup branches". The
476 # git-fast-import documentation suggests using 'TAG_FIXUP'
477 # (outside of the refs/heads namespace), but this is currently
478 # broken. Use a name containing '.', which is not allowed in CVS
479 # symbols, to avoid conflicts (though of course a conflict could
480 # still result if the user requests symbol transformations).
481 return 'refs/heads/TAG.FIXUP'
483 def process_tag_commit(self, svn_commit):
484 # FIXME: For now we create a fixup branch with the same name as
485 # the tag, then the tag. We never delete the fixup branch.
486 self._mirror.start_commit(svn_commit.revnum)
488 source_groups = self._get_source_groups(svn_commit)
489 if self._is_simple_copy(svn_commit, source_groups):
490 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
491 logger.debug(
492 '%s will be created via a simple copy from %s:r%d'
493 % (svn_commit.symbol, source_lod, source_revnum,)
495 mark = self._get_source_mark(source_lod, source_revnum)
496 self._set_symbol(svn_commit.symbol, mark)
497 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
498 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
499 else:
500 logger.debug(
501 '%s will be created via a fixup branch' % (svn_commit.symbol,)
504 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
506 # Create the fixup branch (which might involve making more than
507 # one commit):
508 mark = self._process_symbol_commit(
509 svn_commit, fixup_branch_name, source_groups
512 # Store the mark of the last commit to the fixup branch as the
513 # value of the tag:
514 self._set_symbol(svn_commit.symbol, mark)
515 self.f.write('reset %s\n' % (fixup_branch_name,))
516 self.f.write('\n')
518 if self.tie_tag_fixup_branches:
519 source_lod = source_groups[0][1]
520 source_lod_git_branch = \
521 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
523 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
524 author = self._map_author(Ctx().username)
525 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
527 self.f.write('commit %s\n' % (source_lod_git_branch,))
528 self.f.write('mark :%d\n' % (mark2,))
529 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
530 self.f.write('data %d\n' % (len(log_msg),))
531 self.f.write('%s\n' % (log_msg,))
533 self.f.write(
534 'merge :%d\n'
535 % (mark,)
538 self.f.write('\n')
540 self._mirror.end_commit()
542 def _get_log_msg_for_ancestry_tie(self, svn_commit):
543 return Ctx().text_wrapper.fill(
544 Ctx().tie_tag_ancestry_message % {
545 'symbol_name' : svn_commit.symbol.name,
549 def cleanup(self):
550 DVCSOutputOption.cleanup(self)
551 self.revision_writer.finish()
552 if self.dump_filename is not None:
553 self.f.close()
554 del self.f