f28701961ab8be4a29408e59e554a6f62191527e
[darcs2git.git] / darcs2git.py
blobf28701961ab8be4a29408e59e554a6f62191527e
1 #! /usr/bin/python
3 """
5 darcs2git -- Darcs to git converter.
7 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 """
26 # TODO:
28 # - time zones
30 # - file modes
32 # - use binary search to find from-patch in case of conflict.
35 import urlparse
36 import distutils.version
37 import glob
38 import os
39 import sys
40 import time
41 import xml.dom.minidom
42 import re
43 import gdbm as dbmodule
44 import gzip
45 import optparse
47 ################################################################
48 # globals
51 log_file = None
52 options = None
53 mail_to_name_dict = {}
54 pending_patches = {}
55 git_commits = {}
56 used_tags = {}
58 ################################################################
59 # utils
61 class PullConflict (Exception):
62 pass
63 class CommandFailed (Exception):
64 pass
66 def progress (s):
67 sys.stderr.write (s + '\n')
69 def get_cli_options ():
70 class MyOP(optparse.OptionParser):
71 def print_help(self):
72 optparse.OptionParser.print_help (self)
73 print '''
74 DESCRIPTION
76 This tool is a conversion utility for Darcs repositories, importing
77 them in chronological order. It requires a Git version that has
78 git-fast-import. It does not support incremental updating.
80 BUGS
82 * repositories with skewed timestamps, or different patches with
83 equal timestamps will confuse darcs2git.
84 * does not respect file modes or time zones.
85 * too slow. See source code for instructions to speed it up.
86 * probably doesn\'t work on partial repositories
88 Report new bugs to hanwen@xs4all.nl
90 LICENSE
92 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
93 Distributed under terms of the GNU General Public License
94 This program comes with NO WARRANTY.
95 '''
97 p = MyOP ()
99 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
100 p.description='''Convert darcs repo to git.'''
102 def update_map (option, opt, value, parser):
103 for l in open (value).readlines ():
104 (mail, name) = tuple (l.strip ().split ('='))
105 mail_to_name_dict[mail] = name
107 p.add_option ('-a', '--authors', action='callback',
108 callback=update_map,
109 type='string',
110 nargs=1,
111 help='read a text file, containing EMAIL=NAME lines')
113 p.add_option ('--checkpoint-frequency', action='store',
114 dest='checkpoint_frequency',
115 type='int',
116 default=0,
117 help='how often should the git importer be synced?\n'
118 'Default is 0 (no limit)'
121 p.add_option ('-d', '--destination', action='store',
122 type='string',
123 default='',
124 dest='target_git_repo',
125 help='where to put the resulting Git repo.')
127 p.add_option ('--verbose', action='store_true',
128 dest='verbose',
129 default=False,
130 help='show commands as they are invoked')
132 p.add_option ('--history-window', action='store',
133 dest='history_window',
134 type='int',
135 default=0,
136 help='Look back this many patches as conflict ancestors.\n'
137 'Default is 0 (no limit)')
139 p.add_option ('--debug', action='store_true',
140 dest='debug',
141 default=False,
142 help="""add patch numbers to commit messages;
143 don\'t clean conversion repo;
144 test end result.""")
146 global options
147 options, args = p.parse_args ()
148 if not args:
149 p.print_help ()
150 sys.exit (2)
152 if len(urlparse.urlparse(args[0])) == 0:
153 raise NotImplementedError,"We support local DARCS repos only."
155 git_version = distutils.version.LooseVersion(os.popen("git --version","r").read().strip().split(" ")[-1])
156 ideal_version = distutils.version.LooseVersion("1.5.0")
157 if git_version<ideal_version:
158 raise RuntimeError,"You need git >= 1.5.0 for this."
160 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
161 if not options.target_git_repo:
162 options.target_git_repo = options.basename + '.git'
164 if options.debug:
165 global log_file
166 name = options.target_git_repo.replace ('.git', '.log')
167 if name == options.target_git_repo:
168 name += '.log'
170 progress ("Shell log to %s" % name)
171 log_file = open (name, 'w')
173 return (options, args)
175 def read_pipe (cmd, ignore_errors=False):
176 if options.verbose:
177 progress ('pipe %s' % cmd)
178 pipe = os.popen (cmd)
180 val = pipe.read ()
181 if pipe.close () and not ignore_errors:
182 raise CommandFailed ("Pipe failed: %s" % cmd)
184 return val
186 def system (c, ignore_error=0, timed=0):
187 if timed:
188 c = "time " + c
189 if options.verbose:
190 progress (c)
192 if log_file:
193 log_file.write ('%s\n' % c)
194 log_file.flush ()
196 if os.system (c) and not ignore_error:
197 raise CommandFailed ("Command failed: %s" % c)
199 def darcs_date_to_git (x):
200 t = time.strptime (x, '%Y%m%d%H%M%S')
201 return '%d' % int (time.mktime (t))
203 def darcs_timezone (x) :
204 time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')
206 # todo
207 return "+0100"
209 ################################################################
210 # darcs
212 class DarcsConversionRepo:
213 """Representation of a Darcs repo.
215 The repo is thought to be ordered, and supports methods for
216 going back (obliterate) and forward (pull).
220 def __init__ (self, dir, patches):
221 self.dir = os.path.abspath (dir)
222 self.patches = patches
223 self._current_number = -1
224 self._is_valid = -1
225 self._inventory_dict = None
227 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
229 def __del__ (self):
230 if not options.debug:
231 system ('rm -fr %s' % self.dir)
233 def is_contiguous (self):
234 return (len (self.inventory_dict ()) == self._current_number+1
235 and self.contains_contiguous (self._current_number))
237 def contains_contiguous (self, num):
238 if not self._is_valid:
239 return False
241 darcs_dir = self.dir + '/_darcs'
242 if not os.path.exists (darcs_dir):
243 return False
245 for p in self.patches[:num + 1]:
246 if not self.has_patch (p):
247 return False
249 return True
251 def has_patch (self, p):
252 assert self._is_valid
254 return self.inventory_dict ().has_key (p.short_id ())
256 def pristine_tree (self):
257 return self.dir + '/_darcs/pristine'
259 def go_back_to (self, dest):
261 # at 4, len = 5, go to 2: count == 2
262 count = len (self.inventory_dict()) - dest - 1
264 assert self._is_valid
265 assert count > 0
267 self.checkout ()
268 dir = self.dir
270 progress ('Rewinding %d patches' % count)
271 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
272 d = self.inventory_dict ()
273 for p in self.patches[dest+1:self._current_number+1]:
274 try:
275 del d[p.short_id ()]
276 except KeyError:
277 pass
279 self._current_number = dest
281 def clean (self):
282 system ('rm -rf %s' % self.dir)
284 def checkout (self):
285 dir = self.dir
286 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
288 def pull (self, patch):
289 id = patch.attributes['hash']
290 source_repo = patch.dir
291 dir = self.dir
293 progress ('Pull patch %d' % patch.number)
294 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
296 self._current_number = patch.number
298 ## must reread: the pull may have pulled in others.
299 self._inventory_dict = None
301 def go_forward_to (self, num):
302 d = self.inventory_dict ()
304 pull_me = []
306 ## ugh
307 for p in self.patches[0:num+1]:
308 if not d.has_key (p.short_id ()):
309 pull_me.append (p)
310 d[p.short_id ()] = p
312 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
313 dir = self.dir
314 src = self.patches[0].dir
316 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
317 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
319 def create_fresh (self):
320 dir = self.dir
321 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
322 % locals ())
323 self._is_valid = True
324 self._current_number = -1
325 self._inventory_dict = {}
327 def inventory (self):
328 darcs_dir = self.dir + '/_darcs'
329 i = ''
330 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
331 i += open (f).read ()
332 return i
334 def inventory_dict (self):
335 if type (self._inventory_dict) != type ({}):
336 self._inventory_dict = {}
338 def note_patch (m):
339 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
341 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
342 return self._inventory_dict
344 def start_at (self, num):
346 """Move the repo to NUM.
348 This uses the fishy technique of writing the inventory and
349 constructing the pristine tree with 'darcs repair'
351 progress ('Starting afresh at %d' % num)
353 self.create_fresh ()
354 dir = self.dir
355 iv = open (dir + '/_darcs/inventory', 'w')
356 if log_file:
357 log_file.write ("# messing with _darcs/inventory")
359 for p in self.patches[:num+1]:
360 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
361 iv.write (p.header ())
362 self._inventory_dict[p.short_id ()] = p
363 iv.close ()
365 system ('darcs repair --repo %(dir)s --quiet' % locals ())
366 self.checkout ()
367 self._current_number = num
368 self._is_valid = True
370 def go_to (self, dest):
371 contiguous = self.is_contiguous ()
373 if not self._is_valid:
374 self.start_at (dest)
375 elif dest == self._current_number and contiguous:
376 pass
377 elif (self.contains_contiguous (dest)):
378 self.go_back_to (dest)
379 elif dest - len (self.inventory_dict ()) < dest / 100:
380 self.go_forward_to (dest)
381 else:
382 self.start_at (dest)
385 def go_from_to (self, from_patch, to_patch):
387 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
388 PullConflict if conflict is detected
391 progress ('Trying %s -> %s' % (from_patch, to_patch))
392 dir = self.dir
393 source = to_patch.dir
395 if from_patch:
396 self.go_to (from_patch.number)
397 else:
398 self.create_fresh ()
400 try:
401 self.pull (to_patch)
402 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
403 except CommandFailed:
404 self._is_valid = False
405 raise PullConflict ()
407 if not success:
408 raise PullConflict ()
410 class DarcsPatch:
411 def __repr__ (self):
412 return 'patch %d' % self.number
414 def __init__ (self, xml, dir):
415 self.xml = xml
416 self.dir = dir
417 self.number = -1
418 self.attributes = {}
419 self._contents = None
420 for (nm, value) in xml.attributes.items():
421 self.attributes[nm] = value
423 # fixme: ugh attributes vs. methods.
424 self.extract_author ()
425 self.extract_message ()
426 self.extract_time ()
428 def id (self):
429 return self.attributes['hash']
431 def short_id (self):
432 inv = '*'
433 if self.attributes['inverted'] == 'True':
434 inv = '-'
436 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
438 def filename (self):
439 return self.dir + '/_darcs/patches/' + self.attributes['hash']
441 def contents (self):
442 if type (self._contents) != type (''):
443 f = gzip.open (self.filename ())
444 self._contents = f.read ()
446 return self._contents
448 def header (self):
449 lines = self.contents ().split ('\n')
451 name = lines[0]
452 committer = lines[1] + '\n'
453 committer = re.sub ('] {\n$', ']\n', committer)
454 committer = re.sub ('] *\n$', ']\n', committer)
455 comment = ''
456 if not committer.endswith (']\n'):
457 for l in lines[2:]:
458 if l[0] == ']':
459 comment += ']\n'
460 break
461 comment += l + '\n'
463 header = name + '\n' + committer
464 if comment:
465 header += comment
467 assert header[-1] == '\n'
468 return header
470 def extract_author (self):
471 mail = self.attributes['author']
472 name = ''
473 m = re.search ("^(.*) <(.*)>$", mail)
475 if m:
476 name = m.group (1)
477 mail = m.group (2)
478 else:
479 try:
480 name = mail_to_name_dict[mail]
481 except KeyError:
482 name = mail.split ('@')[0]
484 self.author_name = name
485 self.author_mail = mail
487 def extract_time (self):
488 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
490 def name (self):
491 patch_name = '(no comment)'
492 try:
493 name_elt = self.xml.getElementsByTagName ('name')[0]
494 patch_name = name_elt.childNodes[0].data
495 except IndexError:
496 pass
497 return patch_name
499 def extract_message (self):
500 patch_name = self.name ()
501 comment_elts = self.xml.getElementsByTagName ('comment')
502 comment = ''
503 if comment_elts:
504 comment = comment_elts[0].childNodes[0].data
506 if self.attributes['inverted'] == 'True':
507 patch_name = 'UNDO: ' + patch_name
509 self.message = '%s\n\n%s' % (patch_name, comment)
511 def tag_name (self):
512 patch_name = self.name ()
513 if patch_name.startswith ("TAG "):
514 tag = patch_name[4:]
515 tag = re.sub (r'\s', '_', tag).strip ()
516 tag = re.sub (r':', '_', tag).strip ()
517 return tag
518 return ''
520 def get_darcs_patches (darcs_repo):
521 progress ('reading patches.')
523 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
525 dom = xml.dom.minidom.parseString(xml_string)
526 xmls = dom.documentElement.getElementsByTagName('patch')
528 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
530 n = 0
531 for p in patches:
532 p.number = n
533 n += 1
535 return patches
537 ################################################################
538 # GIT export
540 class GitCommit:
541 def __init__ (self, parent, darcs_patch):
542 self.parent = parent
543 self.darcs_patch = darcs_patch
544 if parent:
545 self.depth = parent.depth + 1
546 else:
547 self.depth = 0
549 def number (self):
550 return self.darcs_patch.number
552 def parent_patch (self):
553 if self.parent:
554 return self.parent.darcs_patch
555 else:
556 return None
558 def common_ancestor (a, b):
559 while 1:
560 if a.depth < b.depth:
561 b = b.parent
562 elif a.depth > b.depth:
563 a = a.parent
564 else:
565 break
567 while a and b:
568 if a == b:
569 return a
571 a = a.parent
572 b = b.parent
574 return None
576 def export_checkpoint (gfi):
577 gfi.write ('checkpoint\n\n')
579 def export_tree (tree, gfi):
580 tree = os.path.normpath (tree)
581 gfi.write ('deleteall\n')
582 for (root, dirs, files) in os.walk (tree):
583 for f in files:
584 rf = os.path.normpath (os.path.join (root, f))
585 s = open (rf).read ()
586 rf = rf.replace (tree + '/', '')
588 gfi.write ('M 644 inline %s\n' % rf)
589 gfi.write ('data %d\n%s\n' % (len (s), s))
590 gfi.write ('\n')
593 def export_commit (repo, patch, last_patch, gfi):
594 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
595 gfi.write ('mark :%d\n' % (patch.number + 1))
596 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
597 patch.author_mail,
598 patch.date))
600 msg = patch.message
601 if options.debug:
602 msg += '\n\n#%d\n' % patch.number
604 gfi.write ('data %d\n%s\n' % (len (msg), msg))
607 mergers = []
608 for (n, p) in pending_patches.items ():
609 if repo.has_patch (p):
610 mergers.append (n)
611 del pending_patches[n]
613 if (last_patch
614 and mergers == []
615 and git_commits.has_key (last_patch.number)):
616 mergers = [last_patch.number]
618 if mergers:
619 gfi.write ('from :%d\n' % (mergers[0] + 1))
620 for m in mergers[1:]:
621 gfi.write ('merge :%d\n' % (m + 1))
623 pending_patches[patch.number] = patch
624 export_tree (repo.pristine_tree (), gfi)
627 n = -1
628 if last_patch:
629 n = last_patch.number
630 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
631 patch)
633 def export_pending (gfi):
634 if len (pending_patches.items ()) == 1:
635 gfi.write ('reset refs/heads/master\n')
636 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
638 progress ("Creating branch master")
639 return
641 for (n, p) in pending_patches.items ():
642 gfi.write ('reset refs/heads/master%d\n' % n)
643 gfi.write ('from :%d\n\n' % (n+1))
645 progress ("Creating branch master%d" % n)
647 patches = pending_patches.values()
648 patch = patches[0]
649 gfi.write ('commit refs/heads/master\n')
650 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
651 patch.author_mail,
652 patch.date))
653 msg = 'tie together'
654 gfi.write ('data %d\n%s\n' % (len(msg), msg))
655 gfi.write ('from :%d\n' % (patch.number + 1))
656 for p in patches[1:]:
657 gfi.write ('merge :%d\n' % (p.number + 1))
658 gfi.write ('\n')
660 def export_tag (patch, gfi):
661 gfi.write ('tag %s\n' % patch.tag_name ())
662 gfi.write ('from :%d\n' % (patch.number + 1))
663 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
664 patch.author_mail,
665 patch.date))
666 gfi.write ('data %d\n%s\n' % (len (patch.message),
667 patch.message))
669 ################################################################
670 # main.
672 def test_conversion (darcs_repo, git_repo):
673 pristine = '%(darcs_repo)s/_darcs/pristine' % locals ()
674 if not os.path.exists (pristine):
675 progress ("darcs repository does not contain pristine tree?!")
676 return
678 gd = options.basename + '.checkouttmp.git'
679 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
680 diff_cmd = 'diff --exclude .git -urN %(gd)s %(pristine)s' % locals ()
681 system ('rm -rf %(gd)s' % locals ())
683 diff = read_pipe (diff_cmd, ignore_errors=True)
684 if diff:
685 if len (diff) > 1024:
686 diff = diff[:512] + '\n...\n' + diff[512:]
688 progress ("Conversion introduced changes: %s" % diff)
689 else:
690 progress ("Checkout matches pristine darcs tree.")
692 def main ():
693 (options, args) = get_cli_options ()
695 darcs_repo = os.path.abspath (args[0])
696 git_repo = os.path.abspath (options.target_git_repo)
698 if os.path.exists (git_repo):
699 system ('rm -rf %(git_repo)s' % locals ())
701 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
702 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
704 os.environ['GIT_DIR'] = git_repo
706 gfi = os.popen ('git-fast-import --quiet', 'w')
708 patches = get_darcs_patches (darcs_repo)
709 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
710 conv_repo.start_at (-1)
712 for p in patches:
714 parent_patch = None
715 parent_number = -1
717 combinations = [(v, w) for v in pending_patches.values ()
718 for w in pending_patches.values ()]
719 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
720 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
721 for (depth, c) in candidates:
722 q = c.darcs_patch
723 try:
724 conv_repo.go_from_to (q, p)
726 parent_patch = q
727 parent_number = q.number
728 progress ('Found existing common parent as predecessor')
729 break
731 except PullConflict:
732 pass
734 ## no branches found where we could attach.
735 ## try previous commits one by one.
736 if not parent_patch:
737 parent_number = p.number - 2
738 while 1:
739 if parent_number >= 0:
740 parent_patch = patches[parent_number]
742 try:
743 conv_repo.go_from_to (parent_patch, p)
744 break
745 except PullConflict:
747 ## simplistic, may not be enough.
748 progress ('conflict, going one back')
749 parent_number -= 1
751 if parent_number < 0:
752 break
754 if (options.history_window
755 and parent_number < p.number - options.history_window):
757 parent_number = -2
758 break
760 if parent_number >= 0 or p.number == 0:
761 progress ('Export %d -> %d (total %d)' % (parent_number,
762 p.number, len (patches)))
763 export_commit (conv_repo, p, parent_patch, gfi)
764 if p.tag_name ():
765 export_tag (p, gfi)
767 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
768 export_checkpoint (gfi)
769 else:
770 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
772 export_pending (gfi)
773 gfi.close ()
775 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
776 os.unlink (f)
778 test_conversion (darcs_repo, git_repo)
780 if not options.debug:
781 conv_repo.clean ()
783 main ()