Remove some unused variable definitions.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blobd7afa4b871f4f5bb598143d39238f863f23e0232
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.log import logger
30 from cvs2svn_lib.context import Ctx
31 from cvs2svn_lib.symbol import Trunk
32 from cvs2svn_lib.symbol import Branch
33 from cvs2svn_lib.symbol import Tag
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.dvcs_common import DVCSOutputOption
36 from cvs2svn_lib.dvcs_common import MirrorUpdater
37 from cvs2svn_lib.key_generator import KeyGenerator
40 class GitRevisionWriter(MirrorUpdater):
42 def start(self, mirror, f):
43 super(GitRevisionWriter, self).start(mirror)
44 self.f = f
46 def _modify_file(self, cvs_item, post_commit):
47 raise NotImplementedError()
49 def add_file(self, cvs_rev, post_commit):
50 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
51 self._modify_file(cvs_rev, post_commit)
53 def modify_file(self, cvs_rev, post_commit):
54 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
55 self._modify_file(cvs_rev, post_commit)
57 def delete_file(self, cvs_rev, post_commit):
58 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
59 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
61 def branch_file(self, cvs_symbol):
62 super(GitRevisionWriter, self).branch_file(cvs_symbol)
63 self._modify_file(cvs_symbol, post_commit=False)
65 def finish(self):
66 super(GitRevisionWriter, self).finish()
67 del self.f
70 class GitRevisionMarkWriter(GitRevisionWriter):
71 def _modify_file(self, cvs_item, post_commit):
72 if cvs_item.cvs_file.executable:
73 mode = '100755'
74 else:
75 mode = '100644'
77 self.f.write(
78 'M %s :%d %s\n'
79 % (mode, cvs_item.revision_reader_token,
80 cvs_item.cvs_file.cvs_path,)
84 class GitRevisionInlineWriter(GitRevisionWriter):
85 def __init__(self, revision_reader):
86 self.revision_reader = revision_reader
88 def register_artifacts(self, which_pass):
89 GitRevisionWriter.register_artifacts(self, which_pass)
90 self.revision_reader.register_artifacts(which_pass)
92 def start(self, mirror, f):
93 GitRevisionWriter.start(self, mirror, f)
94 self.revision_reader.start()
96 def _modify_file(self, cvs_item, post_commit):
97 if cvs_item.cvs_file.executable:
98 mode = '100755'
99 else:
100 mode = '100644'
102 self.f.write(
103 'M %s inline %s\n'
104 % (mode, cvs_item.cvs_file.cvs_path,)
107 if isinstance(cvs_item, CVSSymbol):
108 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
109 else:
110 cvs_rev = cvs_item
112 # FIXME: We have to decide what to do about keyword substitution
113 # and eol_style here:
114 fulltext = self.revision_reader.get_content(cvs_rev)
116 self.f.write('data %d\n' % (len(fulltext),))
117 self.f.write(fulltext)
118 self.f.write('\n')
120 def finish(self):
121 GitRevisionWriter.finish(self)
122 self.revision_reader.finish()
125 class GitOutputOption(DVCSOutputOption):
126 """An OutputOption that outputs to a git-fast-import formatted file.
128 Members:
130 dump_filename -- (string) the name of the file to which the
131 git-fast-import commands for defining revisions will be
132 written.
134 author_transforms -- a map from CVS author names to git full name
135 and email address. See
136 DVCSOutputOption.normalize_author_transforms() for information
137 about the form of this parameter.
141 name = "Git"
143 # The first mark number used for git-fast-import commit marks. This
144 # value needs to be large to avoid conflicts with blob marks.
145 _first_commit_mark = 1000000000
147 def __init__(
148 self, dump_filename, revision_writer,
149 author_transforms=None,
150 tie_tag_fixup_branches=False,
152 """Constructor.
154 DUMP_FILENAME is the name of the file to which the git-fast-import
155 commands for defining revisions should be written. (Please note
156 that depending on the style of revision writer, the actual file
157 contents might not be written to this file.)
159 REVISION_WRITER is a GitRevisionWriter that is used to output
160 either the content of revisions or a mark that was previously used
161 to label a blob.
163 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
164 CVS author names to git full name and email address. All of the
165 contents should either be Unicode strings or 8-bit strings encoded
166 as UTF-8.
168 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
169 fixup branch, it should be psuedo-merged (ancestry linked but no
170 content changes) back into its source branch, to dispose of the
171 open head.
174 DVCSOutputOption.__init__(self)
175 self.dump_filename = dump_filename
176 self.revision_writer = revision_writer
178 self.author_transforms = self.normalize_author_transforms(
179 author_transforms
182 self.tie_tag_fixup_branches = tie_tag_fixup_branches
184 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
186 def register_artifacts(self, which_pass):
187 DVCSOutputOption.register_artifacts(self, which_pass)
188 self.revision_writer.register_artifacts(which_pass)
190 def check_symbols(self, symbol_map):
191 # FIXME: What constraints does git impose on symbols?
192 pass
194 def setup(self, svn_rev_count):
195 DVCSOutputOption.setup(self, svn_rev_count)
196 self.f = open(self.dump_filename, 'wb')
198 # The youngest revnum that has been committed so far:
199 self._youngest = 0
201 # A map {lod : [(revnum, mark)]} giving each of the revision
202 # numbers in which there was a commit to lod, and the mark active
203 # at the end of the revnum.
204 self._marks = {}
206 self.revision_writer.start(self._mirror, self.f)
208 def _create_commit_mark(self, lod, revnum):
209 mark = self._mark_generator.gen_id()
210 self._set_lod_mark(lod, revnum, mark)
211 return mark
213 def _set_lod_mark(self, lod, revnum, mark):
214 """Record MARK as the status of LOD for REVNUM.
216 If there is already an entry for REVNUM, overwrite it. If not,
217 append a new entry to the self._marks list for LOD."""
219 assert revnum >= self._youngest
220 entry = (revnum, mark)
221 try:
222 modifications = self._marks[lod]
223 except KeyError:
224 # This LOD hasn't appeared before; create a new list and add the
225 # entry:
226 self._marks[lod] = [entry]
227 else:
228 # A record exists, so it necessarily has at least one element:
229 if modifications[-1][0] == revnum:
230 modifications[-1] = entry
231 else:
232 modifications.append(entry)
233 self._youngest = revnum
235 def _get_author(self, svn_commit):
236 """Return the author to be used for SVN_COMMIT.
238 Return the author as a UTF-8 string in the form needed by git
239 fast-import; that is, 'name <email>'."""
241 cvs_author = svn_commit.get_author()
242 return self._map_author(cvs_author)
244 def _map_author(self, cvs_author):
245 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
247 @staticmethod
248 def _get_log_msg(svn_commit):
249 return svn_commit.get_log_msg()
251 def process_initial_project_commit(self, svn_commit):
252 self._mirror.start_commit(svn_commit.revnum)
253 self._mirror.end_commit()
255 def process_primary_commit(self, svn_commit):
256 author = self._get_author(svn_commit)
257 log_msg = self._get_log_msg(svn_commit)
259 lods = set()
260 for cvs_rev in svn_commit.get_cvs_items():
261 lods.add(cvs_rev.lod)
262 if len(lods) != 1:
263 raise InternalError('Commit affects %d LODs' % (len(lods),))
264 lod = lods.pop()
266 self._mirror.start_commit(svn_commit.revnum)
267 if isinstance(lod, Trunk):
268 # FIXME: is this correct?:
269 self.f.write('commit refs/heads/master\n')
270 else:
271 self.f.write('commit refs/heads/%s\n' % (lod.name,))
272 self.f.write(
273 'mark :%d\n'
274 % (self._create_commit_mark(lod, svn_commit.revnum),)
276 self.f.write(
277 'committer %s %d +0000\n' % (author, svn_commit.date,)
279 self.f.write('data %d\n' % (len(log_msg),))
280 self.f.write('%s\n' % (log_msg,))
281 for cvs_rev in svn_commit.get_cvs_items():
282 self.revision_writer.process_revision(cvs_rev, post_commit=False)
284 self.f.write('\n')
285 self._mirror.end_commit()
287 def process_post_commit(self, svn_commit):
288 author = self._get_author(svn_commit)
289 log_msg = self._get_log_msg(svn_commit)
291 source_lods = set()
292 for cvs_rev in svn_commit.cvs_revs:
293 source_lods.add(cvs_rev.lod)
294 if len(source_lods) != 1:
295 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
296 source_lod = source_lods.pop()
298 self._mirror.start_commit(svn_commit.revnum)
299 # FIXME: is this correct?:
300 self.f.write('commit refs/heads/master\n')
301 self.f.write(
302 'mark :%d\n'
303 % (self._create_commit_mark(None, svn_commit.revnum),)
305 self.f.write(
306 'committer %s %d +0000\n' % (author, svn_commit.date,)
308 self.f.write('data %d\n' % (len(log_msg),))
309 self.f.write('%s\n' % (log_msg,))
310 self.f.write(
311 'merge :%d\n'
312 % (self._get_source_mark(source_lod, svn_commit.revnum),)
314 for cvs_rev in svn_commit.cvs_revs:
315 self.revision_writer.process_revision(cvs_rev, post_commit=True)
317 self.f.write('\n')
318 self._mirror.end_commit()
320 def _get_source_mark(self, source_lod, revnum):
321 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
323 modifications = self._marks[source_lod]
324 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
325 (revnum, mark) = modifications[i]
326 return mark
328 def describe_lod_to_user(self, lod):
329 """This needs to make sense to users of the fastimported result."""
330 if isinstance(lod, Trunk):
331 return 'master'
332 else:
333 return lod.name
335 def _describe_commit(self, svn_commit, lod):
336 author = self._map_author(svn_commit.get_author())
337 if author.endswith(" <>"):
338 author = author[:-3]
339 date = time.strftime(
340 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
342 log_msg = svn_commit.get_log_msg()
343 if log_msg.find('\n') != -1:
344 log_msg = log_msg[:log_msg.index('\n')]
345 return "%s %s %s '%s'" % (
346 self.describe_lod_to_user(lod), date, author, log_msg,)
348 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
349 author = self._get_author(svn_commit)
350 log_msg = self._get_log_msg(svn_commit)
352 # There are two distinct cases we need to care for here:
353 # 1. initial creation of a LOD
354 # 2. fixup of an existing LOD to include more files, because the LOD in
355 # CVS was created piecemeal over time, with intervening commits
357 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
358 # might be technically more correct (though _get_lod_history is currently
359 # underscore-private)
360 is_initial_lod_creation = svn_commit.symbol not in self._marks
362 # Create the mark, only after the check above
363 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
365 if is_initial_lod_creation:
366 # Get the primary parent
367 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
368 try:
369 p_source_node = self._mirror.get_old_lod_directory(
370 p_source_lod, p_source_revnum
372 except KeyError:
373 raise InternalError('Source %r does not exist' % (p_source_lod,))
374 cvs_files_to_delete = set(self._get_all_files(p_source_node))
376 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
377 for cvs_symbol in cvs_symbols:
378 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
380 # Write a trailer to the log message which describes the cherrypicks that
381 # make up this symbol creation.
382 log_msg += "\n"
383 if is_initial_lod_creation:
384 log_msg += "\nSprout from %s" % (
385 self._describe_commit(
386 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
387 p_source_lod
390 for (source_revnum, source_lod, cvs_symbols,) \
391 in source_groups[(is_initial_lod_creation and 1 or 0):]:
392 log_msg += "\nCherrypick from %s:" % (
393 self._describe_commit(
394 Ctx()._persistence_manager.get_svn_commit(source_revnum),
395 source_lod
398 for cvs_path in sorted(
399 cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols
401 log_msg += "\n %s" % (cvs_path,)
402 if is_initial_lod_creation:
403 if cvs_files_to_delete:
404 log_msg += "\nDelete:"
405 for cvs_path in sorted(
406 cvs_file.cvs_path for cvs_file in cvs_files_to_delete
408 log_msg += "\n %s" % (cvs_path,)
410 self.f.write('commit %s\n' % (git_branch,))
411 self.f.write('mark :%d\n' % (mark,))
412 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
413 self.f.write('data %d\n' % (len(log_msg),))
414 self.f.write('%s\n' % (log_msg,))
416 # Only record actual DVCS ancestry for the primary sprout parent,
417 # all the rest are effectively cherrypicks.
418 if is_initial_lod_creation:
419 self.f.write(
420 'from :%d\n'
421 % (self._get_source_mark(p_source_lod, p_source_revnum),)
424 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
425 for cvs_symbol in cvs_symbols:
426 self.revision_writer.branch_file(cvs_symbol)
428 if is_initial_lod_creation:
429 for cvs_file in cvs_files_to_delete:
430 self.f.write('D %s\n' % (cvs_file.cvs_path,))
432 self.f.write('\n')
433 return mark
435 def process_branch_commit(self, svn_commit):
436 self._mirror.start_commit(svn_commit.revnum)
438 source_groups = self._get_source_groups(svn_commit)
439 if self._is_simple_copy(svn_commit, source_groups):
440 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
441 logger.debug(
442 '%s will be created via a simple copy from %s:r%d'
443 % (svn_commit.symbol, source_lod, source_revnum,)
445 mark = self._get_source_mark(source_lod, source_revnum)
446 self._set_symbol(svn_commit.symbol, mark)
447 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
448 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
449 else:
450 logger.debug(
451 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
453 self._process_symbol_commit(
454 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
455 source_groups,
458 self._mirror.end_commit()
460 def _set_symbol(self, symbol, mark):
461 if isinstance(symbol, Branch):
462 category = 'heads'
463 elif isinstance(symbol, Tag):
464 category = 'tags'
465 else:
466 raise InternalError()
467 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
468 self.f.write('from :%d\n' % (mark,))
470 def get_tag_fixup_branch_name(self, svn_commit):
471 # The branch name to use for the "tag fixup branches". The
472 # git-fast-import documentation suggests using 'TAG_FIXUP'
473 # (outside of the refs/heads namespace), but this is currently
474 # broken. Use a name containing '.', which is not allowed in CVS
475 # symbols, to avoid conflicts (though of course a conflict could
476 # still result if the user requests symbol transformations).
477 return 'refs/heads/TAG.FIXUP'
479 def process_tag_commit(self, svn_commit):
480 # FIXME: For now we create a fixup branch with the same name as
481 # the tag, then the tag. We never delete the fixup branch.
482 self._mirror.start_commit(svn_commit.revnum)
484 source_groups = self._get_source_groups(svn_commit)
485 if self._is_simple_copy(svn_commit, source_groups):
486 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
487 logger.debug(
488 '%s will be created via a simple copy from %s:r%d'
489 % (svn_commit.symbol, source_lod, source_revnum,)
491 mark = self._get_source_mark(source_lod, source_revnum)
492 self._set_symbol(svn_commit.symbol, mark)
493 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
494 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
495 else:
496 logger.debug(
497 '%s will be created via a fixup branch' % (svn_commit.symbol,)
500 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
502 # Create the fixup branch (which might involve making more than
503 # one commit):
504 mark = self._process_symbol_commit(
505 svn_commit, fixup_branch_name, source_groups
508 # Store the mark of the last commit to the fixup branch as the
509 # value of the tag:
510 self._set_symbol(svn_commit.symbol, mark)
511 self.f.write('reset %s\n' % (fixup_branch_name,))
512 self.f.write('\n')
514 if self.tie_tag_fixup_branches:
515 source_lod = source_groups[0][1]
516 source_lod_git_branch = \
517 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
519 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
520 author = self._map_author(Ctx().username)
521 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
523 self.f.write('commit %s\n' % (source_lod_git_branch,))
524 self.f.write('mark :%d\n' % (mark2,))
525 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
526 self.f.write('data %d\n' % (len(log_msg),))
527 self.f.write('%s\n' % (log_msg,))
529 self.f.write(
530 'merge :%d\n'
531 % (mark,)
534 self.f.write('\n')
536 self._mirror.end_commit()
538 def _get_log_msg_for_ancestry_tie(self, svn_commit):
539 return Ctx().text_wrapper.fill(
540 Ctx().tie_tag_ancestry_message % {
541 'symbol_name' : svn_commit.symbol.name,
545 def cleanup(self):
546 DVCSOutputOption.cleanup(self)
547 self.revision_writer.finish()
548 self.f.close()
549 del self.f