sys.modules['bsddb3'] is already in the local namespace.
[cvs2svn.git] / cvs2svn_lib / git_output_option.py
blob01184323d3e90ee5d707fe8637640342596cb4cb
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes for outputting the converted repository to git.
19 For information about the format allowed by git-fast-import, see:
21 http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
23 """
25 import bisect
26 import time
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.log import logger
30 from cvs2svn_lib.context import Ctx
31 from cvs2svn_lib.symbol import Trunk
32 from cvs2svn_lib.symbol import Branch
33 from cvs2svn_lib.symbol import Tag
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.dvcs_common import DVCSOutputOption
36 from cvs2svn_lib.dvcs_common import MirrorUpdater
37 from cvs2svn_lib.key_generator import KeyGenerator
40 class ExpectedDirectoryError(Exception):
41 """A file was found where a directory was expected."""
43 pass
46 class ExpectedFileError(Exception):
47 """A directory was found where a file was expected."""
49 pass
52 class GitRevisionWriter(MirrorUpdater):
54 def start(self, mirror, f):
55 super(GitRevisionWriter, self).start(mirror)
56 self.f = f
58 def _modify_file(self, cvs_item, post_commit):
59 raise NotImplementedError()
61 def add_file(self, cvs_rev, post_commit):
62 super(GitRevisionWriter, self).add_file(cvs_rev, post_commit)
63 self._modify_file(cvs_rev, post_commit)
65 def modify_file(self, cvs_rev, post_commit):
66 super(GitRevisionWriter, self).modify_file(cvs_rev, post_commit)
67 self._modify_file(cvs_rev, post_commit)
69 def delete_file(self, cvs_rev, post_commit):
70 super(GitRevisionWriter, self).delete_file(cvs_rev, post_commit)
71 self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
73 def branch_file(self, cvs_symbol):
74 super(GitRevisionWriter, self).branch_file(cvs_symbol)
75 self._modify_file(cvs_symbol, post_commit=False)
77 def finish(self):
78 super(GitRevisionWriter, self).finish()
79 del self.f
82 class GitRevisionMarkWriter(GitRevisionWriter):
83 def _modify_file(self, cvs_item, post_commit):
84 if cvs_item.cvs_file.executable:
85 mode = '100755'
86 else:
87 mode = '100644'
89 self.f.write(
90 'M %s :%d %s\n'
91 % (mode, cvs_item.revision_reader_token,
92 cvs_item.cvs_file.cvs_path,)
96 class GitRevisionInlineWriter(GitRevisionWriter):
97 def __init__(self, revision_reader):
98 self.revision_reader = revision_reader
100 def register_artifacts(self, which_pass):
101 GitRevisionWriter.register_artifacts(self, which_pass)
102 self.revision_reader.register_artifacts(which_pass)
104 def start(self, mirror, f):
105 GitRevisionWriter.start(self, mirror, f)
106 self.revision_reader.start()
108 def _modify_file(self, cvs_item, post_commit):
109 if cvs_item.cvs_file.executable:
110 mode = '100755'
111 else:
112 mode = '100644'
114 self.f.write(
115 'M %s inline %s\n'
116 % (mode, cvs_item.cvs_file.cvs_path,)
119 if isinstance(cvs_item, CVSSymbol):
120 cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
121 else:
122 cvs_rev = cvs_item
124 # FIXME: We have to decide what to do about keyword substitution
125 # and eol_style here:
126 fulltext = self.revision_reader.get_content(cvs_rev)
128 self.f.write('data %d\n' % (len(fulltext),))
129 self.f.write(fulltext)
130 self.f.write('\n')
132 def finish(self):
133 GitRevisionWriter.finish(self)
134 self.revision_reader.finish()
137 class GitOutputOption(DVCSOutputOption):
138 """An OutputOption that outputs to a git-fast-import formatted file.
140 Members:
142 dump_filename -- (string) the name of the file to which the
143 git-fast-import commands for defining revisions will be
144 written.
146 author_transforms -- a map from CVS author names to git full name
147 and email address. See
148 DVCSOutputOption.normalize_author_transforms() for information
149 about the form of this parameter.
153 name = "Git"
155 # The first mark number used for git-fast-import commit marks. This
156 # value needs to be large to avoid conflicts with blob marks.
157 _first_commit_mark = 1000000000
159 def __init__(
160 self, dump_filename, revision_writer,
161 author_transforms=None,
162 tie_tag_fixup_branches=False,
164 """Constructor.
166 DUMP_FILENAME is the name of the file to which the git-fast-import
167 commands for defining revisions should be written. (Please note
168 that depending on the style of revision writer, the actual file
169 contents might not be written to this file.)
171 REVISION_WRITER is a GitRevisionWriter that is used to output
172 either the content of revisions or a mark that was previously used
173 to label a blob.
175 AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
176 CVS author names to git full name and email address. All of the
177 contents should either be Unicode strings or 8-bit strings encoded
178 as UTF-8.
180 TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
181 fixup branch, it should be psuedo-merged (ancestry linked but no
182 content changes) back into its source branch, to dispose of the
183 open head.
186 DVCSOutputOption.__init__(self)
187 self.dump_filename = dump_filename
188 self.revision_writer = revision_writer
190 self.author_transforms = self.normalize_author_transforms(
191 author_transforms
194 self.tie_tag_fixup_branches = tie_tag_fixup_branches
196 self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
198 def register_artifacts(self, which_pass):
199 DVCSOutputOption.register_artifacts(self, which_pass)
200 self.revision_writer.register_artifacts(which_pass)
202 def check_symbols(self, symbol_map):
203 # FIXME: What constraints does git impose on symbols?
204 pass
206 def setup(self, svn_rev_count):
207 DVCSOutputOption.setup(self, svn_rev_count)
208 self.f = open(self.dump_filename, 'wb')
210 # The youngest revnum that has been committed so far:
211 self._youngest = 0
213 # A map {lod : [(revnum, mark)]} giving each of the revision
214 # numbers in which there was a commit to lod, and the mark active
215 # at the end of the revnum.
216 self._marks = {}
218 self.revision_writer.start(self._mirror, self.f)
220 def _create_commit_mark(self, lod, revnum):
221 mark = self._mark_generator.gen_id()
222 self._set_lod_mark(lod, revnum, mark)
223 return mark
225 def _set_lod_mark(self, lod, revnum, mark):
226 """Record MARK as the status of LOD for REVNUM.
228 If there is already an entry for REVNUM, overwrite it. If not,
229 append a new entry to the self._marks list for LOD."""
231 assert revnum >= self._youngest
232 entry = (revnum, mark)
233 try:
234 modifications = self._marks[lod]
235 except KeyError:
236 # This LOD hasn't appeared before; create a new list and add the
237 # entry:
238 self._marks[lod] = [entry]
239 else:
240 # A record exists, so it necessarily has at least one element:
241 if modifications[-1][0] == revnum:
242 modifications[-1] = entry
243 else:
244 modifications.append(entry)
245 self._youngest = revnum
247 def _get_author(self, svn_commit):
248 """Return the author to be used for SVN_COMMIT.
250 Return the author as a UTF-8 string in the form needed by git
251 fast-import; that is, 'name <email>'."""
253 cvs_author = svn_commit.get_author()
254 return self._map_author(cvs_author)
256 def _map_author(self, cvs_author):
257 return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))
259 @staticmethod
260 def _get_log_msg(svn_commit):
261 return svn_commit.get_log_msg()
263 def process_initial_project_commit(self, svn_commit):
264 self._mirror.start_commit(svn_commit.revnum)
265 self._mirror.end_commit()
267 def process_primary_commit(self, svn_commit):
268 author = self._get_author(svn_commit)
269 log_msg = self._get_log_msg(svn_commit)
271 lods = set()
272 for cvs_rev in svn_commit.get_cvs_items():
273 lods.add(cvs_rev.lod)
274 if len(lods) != 1:
275 raise InternalError('Commit affects %d LODs' % (len(lods),))
276 lod = lods.pop()
278 self._mirror.start_commit(svn_commit.revnum)
279 if isinstance(lod, Trunk):
280 # FIXME: is this correct?:
281 self.f.write('commit refs/heads/master\n')
282 else:
283 self.f.write('commit refs/heads/%s\n' % (lod.name,))
284 self.f.write(
285 'mark :%d\n'
286 % (self._create_commit_mark(lod, svn_commit.revnum),)
288 self.f.write(
289 'committer %s %d +0000\n' % (author, svn_commit.date,)
291 self.f.write('data %d\n' % (len(log_msg),))
292 self.f.write('%s\n' % (log_msg,))
293 for cvs_rev in svn_commit.get_cvs_items():
294 self.revision_writer.process_revision(cvs_rev, post_commit=False)
296 self.f.write('\n')
297 self._mirror.end_commit()
299 def process_post_commit(self, svn_commit):
300 author = self._get_author(svn_commit)
301 log_msg = self._get_log_msg(svn_commit)
303 source_lods = set()
304 for cvs_rev in svn_commit.cvs_revs:
305 source_lods.add(cvs_rev.lod)
306 if len(source_lods) != 1:
307 raise InternalError('Commit is from %d LODs' % (len(source_lods),))
308 source_lod = source_lods.pop()
310 self._mirror.start_commit(svn_commit.revnum)
311 # FIXME: is this correct?:
312 self.f.write('commit refs/heads/master\n')
313 self.f.write(
314 'mark :%d\n'
315 % (self._create_commit_mark(None, svn_commit.revnum),)
317 self.f.write(
318 'committer %s %d +0000\n' % (author, svn_commit.date,)
320 self.f.write('data %d\n' % (len(log_msg),))
321 self.f.write('%s\n' % (log_msg,))
322 self.f.write(
323 'merge :%d\n'
324 % (self._get_source_mark(source_lod, svn_commit.revnum),)
326 for cvs_rev in svn_commit.cvs_revs:
327 self.revision_writer.process_revision(cvs_rev, post_commit=True)
329 self.f.write('\n')
330 self._mirror.end_commit()
332 def _get_source_mark(self, source_lod, revnum):
333 """Return the mark active on SOURCE_LOD at the end of REVNUM."""
335 modifications = self._marks[source_lod]
336 i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
337 (revnum, mark) = modifications[i]
338 return mark
340 def describe_lod_to_user(self, lod):
341 """This needs to make sense to users of the fastimported result."""
342 if isinstance(lod, Trunk):
343 return 'master'
344 else:
345 return lod.name
347 def _describe_commit(self, svn_commit, lod):
348 author = self._map_author(svn_commit.get_author())
349 if author.endswith(" <>"):
350 author = author[:-3]
351 date = time.strftime(
352 "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
354 log_msg = svn_commit.get_log_msg()
355 if log_msg.find('\n') != -1:
356 log_msg = log_msg[:log_msg.index('\n')]
357 return "%s %s %s '%s'" % (
358 self.describe_lod_to_user(lod), date, author, log_msg,)
360 def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
361 author = self._get_author(svn_commit)
362 log_msg = self._get_log_msg(svn_commit)
364 # There are two distinct cases we need to care for here:
365 # 1. initial creation of a LOD
366 # 2. fixup of an existing LOD to include more files, because the LOD in
367 # CVS was created piecemeal over time, with intervening commits
369 # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
370 # might be technically more correct (though _get_lod_history is currently
371 # underscore-private)
372 is_initial_lod_creation = svn_commit.symbol not in self._marks
374 # Create the mark, only after the check above
375 mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
377 if is_initial_lod_creation:
378 # Get the primary parent
379 p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
380 try:
381 p_source_node = self._mirror.get_old_lod_directory(
382 p_source_lod, p_source_revnum
384 except KeyError:
385 raise InternalError('Source %r does not exist' % (p_source_lod,))
386 cvs_files_to_delete = set(self._get_all_files(p_source_node))
388 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
389 for cvs_symbol in cvs_symbols:
390 cvs_files_to_delete.discard(cvs_symbol.cvs_file)
392 # Write a trailer to the log message which describes the cherrypicks that
393 # make up this symbol creation.
394 log_msg += "\n"
395 if is_initial_lod_creation:
396 log_msg += "\nSprout from %s" % (
397 self._describe_commit(
398 Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
399 p_source_lod
402 for (source_revnum, source_lod, cvs_symbols,) \
403 in source_groups[(is_initial_lod_creation and 1 or 0):]:
404 log_msg += "\nCherrypick from %s:" % (
405 self._describe_commit(
406 Ctx()._persistence_manager.get_svn_commit(source_revnum),
407 source_lod
410 for cvs_path in sorted(
411 cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols
413 log_msg += "\n %s" % (cvs_path,)
414 if is_initial_lod_creation:
415 if cvs_files_to_delete:
416 log_msg += "\nDelete:"
417 for cvs_path in sorted(
418 cvs_file.cvs_path for cvs_file in cvs_files_to_delete
420 log_msg += "\n %s" % (cvs_path,)
422 self.f.write('commit %s\n' % (git_branch,))
423 self.f.write('mark :%d\n' % (mark,))
424 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
425 self.f.write('data %d\n' % (len(log_msg),))
426 self.f.write('%s\n' % (log_msg,))
428 # Only record actual DVCS ancestry for the primary sprout parent,
429 # all the rest are effectively cherrypicks.
430 if is_initial_lod_creation:
431 self.f.write(
432 'from :%d\n'
433 % (self._get_source_mark(p_source_lod, p_source_revnum),)
436 for (source_revnum, source_lod, cvs_symbols,) in source_groups:
437 for cvs_symbol in cvs_symbols:
438 self.revision_writer.branch_file(cvs_symbol)
440 if is_initial_lod_creation:
441 for cvs_file in cvs_files_to_delete:
442 self.f.write('D %s\n' % (cvs_file.cvs_path,))
444 self.f.write('\n')
445 return mark
447 def process_branch_commit(self, svn_commit):
448 self._mirror.start_commit(svn_commit.revnum)
450 source_groups = self._get_source_groups(svn_commit)
451 if self._is_simple_copy(svn_commit, source_groups):
452 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
453 logger.debug(
454 '%s will be created via a simple copy from %s:r%d'
455 % (svn_commit.symbol, source_lod, source_revnum,)
457 mark = self._get_source_mark(source_lod, source_revnum)
458 self._set_symbol(svn_commit.symbol, mark)
459 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
460 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
461 else:
462 logger.debug(
463 '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
465 self._process_symbol_commit(
466 svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
467 source_groups,
470 self._mirror.end_commit()
472 def _set_symbol(self, symbol, mark):
473 if isinstance(symbol, Branch):
474 category = 'heads'
475 elif isinstance(symbol, Tag):
476 category = 'tags'
477 else:
478 raise InternalError()
479 self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
480 self.f.write('from :%d\n' % (mark,))
482 def get_tag_fixup_branch_name(self, svn_commit):
483 # The branch name to use for the "tag fixup branches". The
484 # git-fast-import documentation suggests using 'TAG_FIXUP'
485 # (outside of the refs/heads namespace), but this is currently
486 # broken. Use a name containing '.', which is not allowed in CVS
487 # symbols, to avoid conflicts (though of course a conflict could
488 # still result if the user requests symbol transformations).
489 return 'refs/heads/TAG.FIXUP'
491 def process_tag_commit(self, svn_commit):
492 # FIXME: For now we create a fixup branch with the same name as
493 # the tag, then the tag. We never delete the fixup branch.
494 self._mirror.start_commit(svn_commit.revnum)
496 source_groups = self._get_source_groups(svn_commit)
497 if self._is_simple_copy(svn_commit, source_groups):
498 (source_revnum, source_lod, cvs_symbols) = source_groups[0]
499 logger.debug(
500 '%s will be created via a simple copy from %s:r%d'
501 % (svn_commit.symbol, source_lod, source_revnum,)
503 mark = self._get_source_mark(source_lod, source_revnum)
504 self._set_symbol(svn_commit.symbol, mark)
505 self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
506 self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
507 else:
508 logger.debug(
509 '%s will be created via a fixup branch' % (svn_commit.symbol,)
512 fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)
514 # Create the fixup branch (which might involve making more than
515 # one commit):
516 mark = self._process_symbol_commit(
517 svn_commit, fixup_branch_name, source_groups
520 # Store the mark of the last commit to the fixup branch as the
521 # value of the tag:
522 self._set_symbol(svn_commit.symbol, mark)
523 self.f.write('reset %s\n' % (fixup_branch_name,))
524 self.f.write('\n')
526 if self.tie_tag_fixup_branches:
527 source_lod = source_groups[0][1]
528 source_lod_git_branch = \
529 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)
531 mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
532 author = self._map_author(Ctx().username)
533 log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)
535 self.f.write('commit %s\n' % (source_lod_git_branch,))
536 self.f.write('mark :%d\n' % (mark2,))
537 self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
538 self.f.write('data %d\n' % (len(log_msg),))
539 self.f.write('%s\n' % (log_msg,))
541 self.f.write(
542 'merge :%d\n'
543 % (mark,)
546 self.f.write('\n')
548 self._mirror.end_commit()
550 def _get_log_msg_for_ancestry_tie(self, svn_commit):
551 return Ctx().text_wrapper.fill(
552 Ctx().tie_tag_ancestry_message % {
553 'symbol_name' : svn_commit.symbol.name,
557 def cleanup(self):
558 DVCSOutputOption.cleanup(self)
559 self.revision_writer.finish()
560 self.f.close()
561 del self.f