Don't add so much useless metadata to symbol commits
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blobf54c0a60a108ec10e7b73c184db33fd15bde351f
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import sys
26 import bisect
27 import time
28 import shutil
30 from cvs2svn_lib import config
31 from cvs2svn_lib.common import InternalError
32 from cvs2svn_lib.log import logger
33 from cvs2svn_lib.context import Ctx
34 from cvs2svn_lib.symbol import Trunk
35 from cvs2svn_lib.symbol import Branch
36 from cvs2svn_lib.symbol import Tag
37 from cvs2svn_lib.cvs_item import CVSSymbol
38 from cvs2svn_lib.dvcs_common import DVCSOutputOption
39 from cvs2svn_lib.dvcs_common import MirrorUpdater
40 from cvs2svn_lib.key_generator import KeyGenerator
41 from cvs2svn_lib.artifact_manager import artifact_manager
44 class GitRevisionWriter(MirrorUpdater):
46 def start(self, mirror, f):
47 MirrorUpdater.start(self, mirror)
48 self.f = f
50 def _modify_file(self, cvs_item, post_commit):
51 raise NotImplementedError()
53 def add_file(self, cvs_rev, post_commit):
54 MirrorUpdater.add_file(self, cvs_rev, post_commit)
55 self._modify_file(cvs_rev, post_commit)
57 def modify_file(self, cvs_rev, post_commit):
58 MirrorUpdater.modify_file(self, cvs_rev, post_commit)
59 self._modify_file(cvs_rev, post_commit)
61 def delete_file(self, cvs_rev, post_commit):
62 MirrorUpdater.delete_file(self, cvs_rev, post_commit)
63 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
65 def branch_file(self, cvs_symbol):
66 MirrorUpdater.branch_file(self, cvs_symbol)
67 self._modify_file(cvs_symbol, post_commit=False)
69 def finish(self):
70 MirrorUpdater.finish(self)
71 del self.f
74 class GitRevisionMarkWriter(GitRevisionWriter):
75 def register_artifacts(self, which_pass):
76 GitRevisionWriter.register_artifacts(self, which_pass)
77 if Ctx().revision_collector.blob_filename is None:
78 artifact_manager.register_temp_file_needed(
79 config.GIT_BLOB_DATAFILE, which_pass,
82 def start(self, mirror, f):
83 GitRevisionWriter.start(self, mirror, f)
84 if Ctx().revision_collector.blob_filename is None:
85 # The revision collector wrote the blobs to a temporary file;
86 # copy them into f:
87 logger.normal('Copying blob data to output')
88 blobf = open(
89 artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'rb',
91 shutil.copyfileobj(blobf, f)
92 blobf.close()
94 def _modify_file(self, cvs_item, post_commit):
95 if cvs_item.cvs_file.executable:
96 mode = '100755'
97 else:
98 mode = '100644'
100 self.f.write(
101 'M %s :%d %s\n'
102 % (mode, cvs_item.revision_reader_token,
103 cvs_item.cvs_file.cvs_path,)
107 class GitRevisionInlineWriter(GitRevisionWriter):
108 def __init__(self, revision_reader):
109 self.revision_reader = revision_reader
111 def register_artifacts(self, which_pass):
112 GitRevisionWriter.register_artifacts(self, which_pass)
113 self.revision_reader.register_artifacts(which_pass)
115 def start(self, mirror, f):
116 GitRevisionWriter.start(self, mirror, f)
117 self.revision_reader.start()
119 def _modify_file(self, cvs_item, post_commit):
120 if cvs_item.cvs_file.executable:
121 mode = '100755'
122 else:
123 mode = '100644'
125 self.f.write(
126 'M %s inline %s\n'
127 % (mode, cvs_item.cvs_file.cvs_path,)
130 if isinstance(cvs_item, CVSSymbol):
131 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
132 else:
133 cvs_rev = cvs_item
135 # FIXME: We have to decide what to do about keyword substitution
136 # and eol_style here:
137 fulltext = self.revision_reader.get_content(cvs_rev)
139 self.f.write('data %d\n' % (len(fulltext),))
140 self.f.write(fulltext)
141 self.f.write('\n')
143 def finish(self):
144 GitRevisionWriter.finish(self)
145 self.revision_reader.finish()
148 class GitOutputOption(DVCSOutputOption):
149 """An OutputOption that outputs to a git-fast-import formatted file.
151 Members:
153 dump_filename -- (string or None) the name of the file to which
154 the git-fast-import commands for defining revisions will be
155 written. If None, the data will be written to stdout.
157 author_transforms -- a map from CVS author names to git full name
158 and email address. See
159 DVCSOutputOption.normalize_author_transforms() for information
160 about the form of this parameter.
164 name = "Git"
166 # The first mark number used for git-fast-import commit marks. This
167 # value needs to be large to avoid conflicts with blob marks.
168 _first_commit_mark = 1000000000
170 def __init__(
171 self, revision_writer,
172 dump_filename=None,
173 author_transforms=None,
174 tie_tag_fixup_branches=False,
176 """Constructor.
178 REVISION_WRITER is a GitRevisionWriter that is used to output
179 either the content of revisions or a mark that was previously used
180 to label a blob.
182 DUMP_FILENAME is the name of the file to which the git-fast-import
183 commands for defining revisions should be written. (Please note
184 that depending on the style of revision writer, the actual file
185 contents might not be written to this file.) If it is None, then
186 the output is written to stdout.
188 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
189 CVS author names to git full name and email address. All of the
190 contents should either be Unicode strings or 8-bit strings encoded
191 as UTF-8.
193 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
194 fixup branch, it should be psuedo-merged (ancestry linked but no
195 content changes) back into its source branch, to dispose of the
196 open head.
199 DVCSOutputOption.__init__(self)
200 self.dump_filename = dump_filename
201 self.revision_writer = revision_writer
203 self.author_transforms = self.normalize_author_transforms(
204 author_transforms
207 self.tie_tag_fixup_branches = tie_tag_fixup_branches
209 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
211 def register_artifacts(self, which_pass):
212 DVCSOutputOption.register_artifacts(self, which_pass)
213 self.revision_writer.register_artifacts(which_pass)
215 def check_symbols(self, symbol_map):
216 # FIXME: What constraints does git impose on symbols?
217 pass
219 def setup(self, svn_rev_count):
220 DVCSOutputOption.setup(self, svn_rev_count)
221 if self.dump_filename is None:
222 self.f = sys.stdout
223 else:
224 self.f = open(self.dump_filename, 'wb')
226 # The youngest revnum that has been committed so far:
227 self._youngest = 0
229 # A map {lod : [(revnum, mark)]} giving each of the revision
230 # numbers in which there was a commit to lod, and the mark active
231 # at the end of the revnum.
232 self._marks = {}
234 self.revision_writer.start(self._mirror, self.f)
236 def _create_commit_mark(self, lod, revnum):
237 mark = self._mark_generator.gen_id()
238 self._set_lod_mark(lod, revnum, mark)
239 return mark
241 def _set_lod_mark(self, lod, revnum, mark):
242 """Record MARK as the status of LOD for REVNUM.
244 If there is already an entry for REVNUM, overwrite it. If not,
245 append a new entry to the self._marks list for LOD."""
247 assert revnum >= self._youngest
248 entry = (revnum, mark)
249 try:
250 modifications = self._marks[lod]
251 except KeyError:
252 # This LOD hasn't appeared before; create a new list and add the
253 # entry:
254 self._marks[lod] = [entry]
255 else:
256 # A record exists, so it necessarily has at least one element:
257 if modifications[-1][0] == revnum:
258 modifications[-1] = entry
259 else:
260 modifications.append(entry)
261 self._youngest = revnum
263 def _get_author(self, svn_commit):
264 """Return the author to be used for SVN_COMMIT.
266 Return the author as a UTF-8 string in the form needed by git
267 fast-import; that is, 'name <email>'."""
269 cvs_author = svn_commit.get_author()
270 return self._map_author(cvs_author)
272 def _map_author(self, cvs_author):
273 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
275 @staticmethod
276 def _get_log_msg(svn_commit):
277 return svn_commit.get_log_msg()
279 def process_initial_project_commit(self, svn_commit):
280 self._mirror.start_commit(svn_commit.revnum)
281 self._mirror.end_commit()
283 def process_primary_commit(self, svn_commit):
284 author = self._get_author(svn_commit)
285 log_msg = self._get_log_msg(svn_commit)
287 lods = set()
288 for cvs_rev in svn_commit.get_cvs_items():
289 lods.add(cvs_rev.lod)
290 if len(lods) != 1:
291 raise InternalError('Commit affects %d LODs' % (len(lods),))
292 lod = lods.pop()
294 self._mirror.start_commit(svn_commit.revnum)
295 if isinstance(lod, Trunk):
296 # FIXME: is this correct?:
297 self.f.write('commit refs/heads/master\n')
298 else:
299 self.f.write('commit refs/heads/%s\n' % (lod.name,))
300 mark = self._create_commit_mark(lod, svn_commit.revnum)
301 logger.normal(
302 'Writing commit r%d on %s (mark :%d)'
303 % (svn_commit.revnum, lod, mark,)
305 self.f.write('mark :%d\n' % (mark,))
306 self.f.write(
307 'committer %s %d +0000\n' % (author, svn_commit.date,)
309 self.f.write('data %d\n' % (len(log_msg),))
310 self.f.write('%s\n' % (log_msg,))
311 for cvs_rev in svn_commit.get_cvs_items():
312 self.revision_writer.process_revision(cvs_rev, post_commit=False)
314 self.f.write('\n')
315 self._mirror.end_commit()
317 def process_post_commit(self, svn_commit):
318 author = self._get_author(svn_commit)
319 log_msg = self._get_log_msg(svn_commit)
321 source_lods = set()
322 for cvs_rev in svn_commit.cvs_revs:
323 source_lods.add(cvs_rev.lod)
324 if len(source_lods) != 1:
325 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
326 source_lod = source_lods.pop()
328 self._mirror.start_commit(svn_commit.revnum)
329 # FIXME: is this correct?:
330 self.f.write('commit refs/heads/master\n')
331 mark = self._create_commit_mark(None, svn_commit.revnum)
332 logger.normal(
333 'Writing post-commit r%d on Trunk (mark :%d)'
334 % (svn_commit.revnum, mark,)
336 self.f.write('mark :%d\n' % (mark,))
337 self.f.write(
338 'committer %s %d +0000\n' % (author, svn_commit.date,)
340 self.f.write('data %d\n' % (len(log_msg),))
341 self.f.write('%s\n' % (log_msg,))
342 self.f.write(
343 'merge :%d\n'
344 % (self._get_source_mark(source_lod, svn_commit.revnum),)
346 for cvs_rev in svn_commit.cvs_revs:
347 self.revision_writer.process_revision(cvs_rev, post_commit=True)
349 self.f.write('\n')
350 self._mirror.end_commit()
352 def _get_source_mark(self, source_lod, revnum):
353 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
355 modifications = self._marks[source_lod]
356 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
357 (revnum, mark) = modifications[i]
358 return mark
360 def describe_lod_to_user(self, lod):
361 """This needs to make sense to users of the fastimported result."""
362 if isinstance(lod, Trunk):
363 return 'master'
364 else:
365 return lod.name
367 def _describe_commit(self, svn_commit, lod):
368 author = self._map_author(svn_commit.get_author())
369 if author.endswith(" <>"):
370 author = author[:-3]
371 date = time.strftime(
372 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
374 log_msg = svn_commit.get_log_msg()
375 if log_msg.find('\n') != -1:
376 log_msg = log_msg[:log_msg.index('\n')]
377 return "%s %s %s '%s'" % (
378 self.describe_lod_to_user(lod), date, author, log_msg,)
380 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
381 author = self._get_author(svn_commit)
382 log_msg = self._get_log_msg(svn_commit)
384 # There are two distinct cases we need to care for here:
385 # 1. initial creation of a LOD
386 # 2. fixup of an existing LOD to include more files, because the LOD in
387 # CVS was created piecemeal over time, with intervening commits
389 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
390 # might be technically more correct (though _get_lod_history is currently
391 # underscore-private)
392 is_initial_lod_creation = svn_commit.symbol not in self._marks
394 # Create the mark, only after the check above
395 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
397 if is_initial_lod_creation:
398 # Get the primary parent
399 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
400 try:
401 p_source_node = self._mirror.get_old_lod_directory(
402 p_source_lod, p_source_revnum
404 except KeyError:
405 raise InternalError('Source %r does not exist' % (p_source_lod,))
406 cvs_files_to_delete = set(self._get_all_files(p_source_node))
408 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
409 for cvs_symbol in cvs_symbols:
410 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
412 self.f.write('commit %s\n' % (git_branch,))
413 self.f.write('mark :%d\n' % (mark,))
414 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
415 self.f.write('data %d\n' % (len(log_msg),))
416 self.f.write('%s\n' % (log_msg,))
418 # Only record actual DVCS ancestry for the primary sprout parent,
419 # all the rest are effectively cherrypicks.
420 if is_initial_lod_creation:
421 self.f.write(
422 'from :%d\n'
423 % (self._get_source_mark(p_source_lod, p_source_revnum),)
426 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
427 for cvs_symbol in cvs_symbols:
428 self.revision_writer.branch_file(cvs_symbol)
430 if is_initial_lod_creation:
431 for cvs_file in cvs_files_to_delete:
432 self.f.write('D %s\n' % (cvs_file.cvs_path,))
434 self.f.write('\n')
435 return mark
437 def process_branch_commit(self, svn_commit):
438 self._mirror.start_commit(svn_commit.revnum)
440 source_groups = self._get_source_groups(svn_commit)
441 if self._is_simple_copy(svn_commit, source_groups):
442 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
443 logger.debug(
444 '%s will be created via a simple copy from %s:r%d'
445 % (svn_commit.symbol, source_lod, source_revnum,)
447 mark = self._get_source_mark(source_lod, source_revnum)
448 self._set_symbol(svn_commit.symbol, mark)
449 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
450 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
451 else:
452 logger.debug(
453 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
455 self._process_symbol_commit(
456 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
457 source_groups,
460 self._mirror.end_commit()
462 def _set_symbol(self, symbol, mark):
463 if isinstance(symbol, Branch):
464 category = 'heads'
465 elif isinstance(symbol, Tag):
466 category = 'tags'
467 else:
468 raise InternalError()
469 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
470 self.f.write('from :%d\n' % (mark,))
472 def get_tag_fixup_branch_name(self, svn_commit):
473 # The branch name to use for the "tag fixup branches". The
474 # git-fast-import documentation suggests using 'TAG_FIXUP'
475 # (outside of the refs/heads namespace), but this is currently
476 # broken. Use a name containing '.', which is not allowed in CVS
477 # symbols, to avoid conflicts (though of course a conflict could
478 # still result if the user requests symbol transformations).
479 return 'refs/heads/TAG.FIXUP'
481 def process_tag_commit(self, svn_commit):
482 # FIXME: For now we create a fixup branch with the same name as
483 # the tag, then the tag. We never delete the fixup branch.
484 self._mirror.start_commit(svn_commit.revnum)
486 source_groups = self._get_source_groups(svn_commit)
487 if self._is_simple_copy(svn_commit, source_groups):
488 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
489 logger.debug(
490 '%s will be created via a simple copy from %s:r%d'
491 % (svn_commit.symbol, source_lod, source_revnum,)
493 mark = self._get_source_mark(source_lod, source_revnum)
494 self._set_symbol(svn_commit.symbol, mark)
495 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
496 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
497 else:
498 logger.debug(
499 '%s will be created via a fixup branch' % (svn_commit.symbol,)
502 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
504 # Create the fixup branch (which might involve making more than
505 # one commit):
506 mark = self._process_symbol_commit(
507 svn_commit, fixup_branch_name, source_groups
510 # Store the mark of the last commit to the fixup branch as the
511 # value of the tag:
512 self._set_symbol(svn_commit.symbol, mark)
513 self.f.write('reset %s\n' % (fixup_branch_name,))
514 self.f.write('\n')
516 if self.tie_tag_fixup_branches:
517 source_lod = source_groups[0][1]
518 source_lod_git_branch = \
519 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
521 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
522 author = self._map_author(Ctx().username)
523 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
525 self.f.write('commit %s\n' % (source_lod_git_branch,))
526 self.f.write('mark :%d\n' % (mark2,))
527 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
528 self.f.write('data %d\n' % (len(log_msg),))
529 self.f.write('%s\n' % (log_msg,))
531 self.f.write(
532 'merge :%d\n'
533 % (mark,)
536 self.f.write('\n')
538 self._mirror.end_commit()
540 def _get_log_msg_for_ancestry_tie(self, svn_commit):
541 return Ctx().text_wrapper.fill(
542 Ctx().tie_tag_ancestry_message % {
543 'symbol_name' : svn_commit.symbol.name,
547 def cleanup(self):
548 DVCSOutputOption.cleanup(self)
549 self.revision_writer.finish()
550 if self.dump_filename is not None:
551 self.f.close()
552 del self.f