test_conversion thinkos.
[darcs2git.git] / darcs2git.py
blob847e1fca5e7e02a1b643e1c53c7886060eba8066
1 #! /usr/bin/python
3 """
5 darcs2git -- Darcs to git converter.
7 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 """
26 # TODO:
28 # - time zones
30 # - file modes
32 # - use binary search to find from-patch in case of conflict.
35 import urlparse
36 import distutils.version
37 import glob
38 import os
39 import sys
40 import time
41 import xml.dom.minidom
42 import re
43 import gzip
44 import optparse
46 ################################################################
47 # globals
50 log_file = None
51 options = None
52 mail_to_name_dict = {}
53 pending_patches = {}
54 git_commits = {}
55 used_tags = {}
57 ################################################################
58 # utils
60 class PullConflict (Exception):
61 pass
62 class CommandFailed (Exception):
63 pass
65 def progress (s):
66 sys.stderr.write (s + '\n')
68 def get_cli_options ():
69 class MyOP(optparse.OptionParser):
70 def print_help(self):
71 optparse.OptionParser.print_help (self)
72 print '''
73 DESCRIPTION
75 This tool is a conversion utility for Darcs repositories, importing
76 them in chronological order. It requires a Git version that has
77 git-fast-import. It does not support incremental updating.
79 BUGS
81 * repositories with skewed timestamps, or different patches with
82 equal timestamps will confuse darcs2git.
83 * does not respect file modes or time zones.
84 * too slow. See source code for instructions to speed it up.
85 * probably doesn\'t work on partial repositories
87 Report new bugs to hanwen@xs4all.nl
89 LICENSE
91 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
92 Distributed under terms of the GNU General Public License
93 This program comes with NO WARRANTY.
94 '''
96 p = MyOP ()
98 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
99 p.description='''Convert darcs repo to git.'''
101 def update_map (option, opt, value, parser):
102 for l in open (value).readlines ():
103 (mail, name) = tuple (l.strip ().split ('='))
104 mail_to_name_dict[mail] = name
106 p.add_option ('-a', '--authors', action='callback',
107 callback=update_map,
108 type='string',
109 nargs=1,
110 help='read a text file, containing EMAIL=NAME lines')
112 p.add_option ('--checkpoint-frequency', action='store',
113 dest='checkpoint_frequency',
114 type='int',
115 default=0,
116 help='how often should the git importer be synced?\n'
117 'Default is 0 (no limit)'
120 p.add_option ('-d', '--destination', action='store',
121 type='string',
122 default='',
123 dest='target_git_repo',
124 help='where to put the resulting Git repo.')
126 p.add_option ('--verbose', action='store_true',
127 dest='verbose',
128 default=False,
129 help='show commands as they are invoked')
131 p.add_option ('--history-window', action='store',
132 dest='history_window',
133 type='int',
134 default=0,
135 help='Look back this many patches as conflict ancestors.\n'
136 'Default is 0 (no limit)')
138 p.add_option ('--debug', action='store_true',
139 dest='debug',
140 default=False,
141 help="""add patch numbers to commit messages;
142 don\'t clean conversion repo;
143 test end result.""")
145 global options
146 options, args = p.parse_args ()
147 if not args:
148 p.print_help ()
149 sys.exit (2)
151 if len(urlparse.urlparse(args[0])) == 0:
152 raise NotImplementedError,"We support local DARCS repos only."
154 git_version = distutils.version.LooseVersion(os.popen("git --version","r").read().strip().split(" ")[-1])
155 ideal_version = distutils.version.LooseVersion("1.5.0")
156 if git_version<ideal_version:
157 raise RuntimeError,"You need git >= 1.5.0 for this."
159 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
160 if not options.target_git_repo:
161 options.target_git_repo = options.basename + '.git'
163 if options.debug:
164 global log_file
165 name = options.target_git_repo.replace ('.git', '.log')
166 if name == options.target_git_repo:
167 name += '.log'
169 progress ("Shell log to %s" % name)
170 log_file = open (name, 'w')
172 return (options, args)
174 def read_pipe (cmd, ignore_errors=False):
175 if options.verbose:
176 progress ('pipe %s' % cmd)
177 pipe = os.popen (cmd)
179 val = pipe.read ()
180 if pipe.close () and not ignore_errors:
181 raise CommandFailed ("Pipe failed: %s" % cmd)
183 return val
185 def system (c, ignore_error=0, timed=0):
186 if timed:
187 c = "time " + c
188 if options.verbose:
189 progress (c)
191 if log_file:
192 log_file.write ('%s\n' % c)
193 log_file.flush ()
195 if os.system (c) and not ignore_error:
196 raise CommandFailed ("Command failed: %s" % c)
198 def darcs_date_to_git (x):
199 t = time.strptime (x, '%Y%m%d%H%M%S')
200 return '%d' % int (time.mktime (t))
202 def darcs_timezone (x) :
203 time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')
205 # todo
206 return "+0100"
208 ################################################################
209 # darcs
211 class DarcsConversionRepo:
212 """Representation of a Darcs repo.
214 The repo is thought to be ordered, and supports methods for
215 going back (obliterate) and forward (pull).
219 def __init__ (self, dir, patches):
220 self.dir = os.path.abspath (dir)
221 self.patches = patches
222 self._current_number = -1
223 self._is_valid = -1
224 self._inventory_dict = None
226 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
228 def __del__ (self):
229 if not options.debug:
230 system ('rm -fr %s' % self.dir)
232 def is_contiguous (self):
233 return (len (self.inventory_dict ()) == self._current_number+1
234 and self.contains_contiguous (self._current_number))
236 def contains_contiguous (self, num):
237 if not self._is_valid:
238 return False
240 darcs_dir = self.dir + '/_darcs'
241 if not os.path.exists (darcs_dir):
242 return False
244 for p in self.patches[:num + 1]:
245 if not self.has_patch (p):
246 return False
248 return True
250 def has_patch (self, p):
251 assert self._is_valid
253 return self.inventory_dict ().has_key (p.short_id ())
255 def pristine_tree (self):
256 return self.dir + '/_darcs/pristine'
258 def go_back_to (self, dest):
260 # at 4, len = 5, go to 2: count == 2
261 count = len (self.inventory_dict()) - dest - 1
263 assert self._is_valid
264 assert count > 0
266 self.checkout ()
267 dir = self.dir
269 progress ('Rewinding %d patches' % count)
270 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
271 d = self.inventory_dict ()
272 for p in self.patches[dest+1:self._current_number+1]:
273 try:
274 del d[p.short_id ()]
275 except KeyError:
276 pass
278 self._current_number = dest
280 def clean (self):
281 system ('rm -rf %s' % self.dir)
283 def checkout (self):
284 dir = self.dir
285 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
287 def pull (self, patch):
288 id = patch.attributes['hash']
289 source_repo = patch.dir
290 dir = self.dir
292 progress ('Pull patch %d' % patch.number)
293 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
295 self._current_number = patch.number
297 ## must reread: the pull may have pulled in others.
298 self._inventory_dict = None
300 def go_forward_to (self, num):
301 d = self.inventory_dict ()
303 pull_me = []
305 ## ugh
306 for p in self.patches[0:num+1]:
307 if not d.has_key (p.short_id ()):
308 pull_me.append (p)
309 d[p.short_id ()] = p
311 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
312 dir = self.dir
313 src = self.patches[0].dir
315 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
316 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
318 def create_fresh (self):
319 dir = self.dir
320 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
321 % locals ())
322 self._is_valid = True
323 self._current_number = -1
324 self._inventory_dict = {}
326 def inventory (self):
327 darcs_dir = self.dir + '/_darcs'
328 i = ''
329 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
330 i += open (f).read ()
331 return i
333 def inventory_dict (self):
334 if type (self._inventory_dict) != type ({}):
335 self._inventory_dict = {}
337 def note_patch (m):
338 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
340 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
341 return self._inventory_dict
343 def start_at (self, num):
345 """Move the repo to NUM.
347 This uses the fishy technique of writing the inventory and
348 constructing the pristine tree with 'darcs repair'
350 progress ('Starting afresh at %d' % num)
352 self.create_fresh ()
353 dir = self.dir
354 iv = open (dir + '/_darcs/inventory', 'w')
355 if log_file:
356 log_file.write ("# messing with _darcs/inventory")
358 for p in self.patches[:num+1]:
359 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
360 iv.write (p.header ())
361 self._inventory_dict[p.short_id ()] = p
362 iv.close ()
364 system ('darcs repair --repo %(dir)s --quiet' % locals ())
365 self.checkout ()
366 self._current_number = num
367 self._is_valid = True
369 def go_to (self, dest):
370 contiguous = self.is_contiguous ()
372 if not self._is_valid:
373 self.start_at (dest)
374 elif dest == self._current_number and contiguous:
375 pass
376 elif (self.contains_contiguous (dest)):
377 self.go_back_to (dest)
378 elif dest - len (self.inventory_dict ()) < dest / 100:
379 self.go_forward_to (dest)
380 else:
381 self.start_at (dest)
384 def go_from_to (self, from_patch, to_patch):
386 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
387 PullConflict if conflict is detected
390 progress ('Trying %s -> %s' % (from_patch, to_patch))
391 dir = self.dir
392 source = to_patch.dir
394 if from_patch:
395 self.go_to (from_patch.number)
396 else:
397 self.create_fresh ()
399 try:
400 self.pull (to_patch)
401 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
402 except CommandFailed:
403 self._is_valid = False
404 raise PullConflict ()
406 if not success:
407 raise PullConflict ()
409 class DarcsPatch:
410 def __repr__ (self):
411 return 'patch %d' % self.number
413 def __init__ (self, xml, dir):
414 self.xml = xml
415 self.dir = dir
416 self.number = -1
417 self.attributes = {}
418 self._contents = None
419 for (nm, value) in xml.attributes.items():
420 self.attributes[nm] = value
422 # fixme: ugh attributes vs. methods.
423 self.extract_author ()
424 self.extract_message ()
425 self.extract_time ()
427 def id (self):
428 return self.attributes['hash']
430 def short_id (self):
431 inv = '*'
432 if self.attributes['inverted'] == 'True':
433 inv = '-'
435 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
437 def filename (self):
438 return self.dir + '/_darcs/patches/' + self.attributes['hash']
440 def contents (self):
441 if type (self._contents) != type (''):
442 f = gzip.open (self.filename ())
443 self._contents = f.read ()
445 return self._contents
447 def header (self):
448 lines = self.contents ().split ('\n')
450 name = lines[0]
451 committer = lines[1] + '\n'
452 committer = re.sub ('] {\n$', ']\n', committer)
453 committer = re.sub ('] *\n$', ']\n', committer)
454 comment = ''
455 if not committer.endswith (']\n'):
456 for l in lines[2:]:
457 if l[0] == ']':
458 comment += ']\n'
459 break
460 comment += l + '\n'
462 header = name + '\n' + committer
463 if comment:
464 header += comment
466 assert header[-1] == '\n'
467 return header
469 def extract_author (self):
470 mail = self.attributes['author']
471 name = ''
472 m = re.search ("^(.*) <(.*)>$", mail)
474 if m:
475 name = m.group (1)
476 mail = m.group (2)
477 else:
478 try:
479 name = mail_to_name_dict[mail]
480 except KeyError:
481 name = mail.split ('@')[0]
483 self.author_name = name
484 self.author_mail = mail
486 def extract_time (self):
487 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
489 def name (self):
490 patch_name = '(no comment)'
491 try:
492 name_elt = self.xml.getElementsByTagName ('name')[0]
493 patch_name = name_elt.childNodes[0].data
494 except IndexError:
495 pass
496 return patch_name
498 def extract_message (self):
499 patch_name = self.name ()
500 comment_elts = self.xml.getElementsByTagName ('comment')
501 comment = ''
502 if comment_elts:
503 comment = comment_elts[0].childNodes[0].data
505 if self.attributes['inverted'] == 'True':
506 patch_name = 'UNDO: ' + patch_name
508 self.message = '%s\n\n%s' % (patch_name, comment)
510 def tag_name (self):
511 patch_name = self.name ()
512 if patch_name.startswith ("TAG "):
513 tag = patch_name[4:]
514 tag = re.sub (r'\s', '_', tag).strip ()
515 tag = re.sub (r':', '_', tag).strip ()
516 return tag
517 return ''
519 def get_darcs_patches (darcs_repo):
520 progress ('reading patches.')
522 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
524 dom = xml.dom.minidom.parseString(xml_string)
525 xmls = dom.documentElement.getElementsByTagName('patch')
527 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
529 n = 0
530 for p in patches:
531 p.number = n
532 n += 1
534 return patches
536 ################################################################
537 # GIT export
539 class GitCommit:
540 def __init__ (self, parent, darcs_patch):
541 self.parent = parent
542 self.darcs_patch = darcs_patch
543 if parent:
544 self.depth = parent.depth + 1
545 else:
546 self.depth = 0
548 def number (self):
549 return self.darcs_patch.number
551 def parent_patch (self):
552 if self.parent:
553 return self.parent.darcs_patch
554 else:
555 return None
557 def common_ancestor (a, b):
558 while 1:
559 if a.depth < b.depth:
560 b = b.parent
561 elif a.depth > b.depth:
562 a = a.parent
563 else:
564 break
566 while a and b:
567 if a == b:
568 return a
570 a = a.parent
571 b = b.parent
573 return None
575 def export_checkpoint (gfi):
576 gfi.write ('checkpoint\n\n')
578 def export_tree (tree, gfi):
579 tree = os.path.normpath (tree)
580 gfi.write ('deleteall\n')
581 for (root, dirs, files) in os.walk (tree):
582 for f in files:
583 rf = os.path.normpath (os.path.join (root, f))
584 s = open (rf).read ()
585 rf = rf.replace (tree + '/', '')
587 gfi.write ('M 644 inline %s\n' % rf)
588 gfi.write ('data %d\n%s\n' % (len (s), s))
589 gfi.write ('\n')
592 def export_commit (repo, patch, last_patch, gfi):
593 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
594 gfi.write ('mark :%d\n' % (patch.number + 1))
595 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
596 patch.author_mail,
597 patch.date))
599 msg = patch.message
600 if options.debug:
601 msg += '\n\n#%d\n' % patch.number
603 gfi.write ('data %d\n%s\n' % (len (msg), msg))
606 mergers = []
607 for (n, p) in pending_patches.items ():
608 if repo.has_patch (p):
609 mergers.append (n)
610 del pending_patches[n]
612 if (last_patch
613 and mergers == []
614 and git_commits.has_key (last_patch.number)):
615 mergers = [last_patch.number]
617 if mergers:
618 gfi.write ('from :%d\n' % (mergers[0] + 1))
619 for m in mergers[1:]:
620 gfi.write ('merge :%d\n' % (m + 1))
622 pending_patches[patch.number] = patch
623 export_tree (repo.pristine_tree (), gfi)
626 n = -1
627 if last_patch:
628 n = last_patch.number
629 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
630 patch)
632 def export_pending (gfi):
633 if len (pending_patches.items ()) == 1:
634 gfi.write ('reset refs/heads/master\n')
635 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
637 progress ("Creating branch master")
638 return
640 for (n, p) in pending_patches.items ():
641 gfi.write ('reset refs/heads/master%d\n' % n)
642 gfi.write ('from :%d\n\n' % (n+1))
644 progress ("Creating branch master%d" % n)
646 patches = pending_patches.values()
647 patch = patches[0]
648 gfi.write ('commit refs/heads/master\n')
649 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
650 patch.author_mail,
651 patch.date))
652 msg = 'tie together'
653 gfi.write ('data %d\n%s\n' % (len(msg), msg))
654 gfi.write ('from :%d\n' % (patch.number + 1))
655 for p in patches[1:]:
656 gfi.write ('merge :%d\n' % (p.number + 1))
657 gfi.write ('\n')
659 def export_tag (patch, gfi):
660 gfi.write ('tag %s\n' % patch.tag_name ())
661 gfi.write ('from :%d\n' % (patch.number + 1))
662 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
663 patch.author_mail,
664 patch.date))
665 gfi.write ('data %d\n%s\n' % (len (patch.message),
666 patch.message))
668 ################################################################
669 # main.
671 def test_conversion (darcs_repo, git_repo):
672 pristine = '%(darcs_repo)s/_darcs/pristine' % locals ()
673 if not os.path.exists (pristine):
674 progress ("darcs repository does not contain pristine tree?!")
675 return
677 gd = options.basename + '.checkouttmp.git'
678 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
679 diff_cmd = 'diff --exclude .git -urN %(gd)s %(pristine)s' % locals ()
680 diff = read_pipe (diff_cmd, ignore_errors=True)
681 system ('rm -rf %(gd)s' % locals ())
683 if diff:
684 if len (diff) > 1024:
685 diff = diff[:512] + '\n...\n' + diff[-512:]
687 progress ("Conversion introduced changes: %s" % diff)
688 raise 'fdsa'
689 else:
690 progress ("Checkout matches pristine darcs tree.")
692 def main ():
693 (options, args) = get_cli_options ()
695 darcs_repo = os.path.abspath (args[0])
696 git_repo = os.path.abspath (options.target_git_repo)
698 if os.path.exists (git_repo):
699 system ('rm -rf %(git_repo)s' % locals ())
701 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
702 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
704 os.environ['GIT_DIR'] = git_repo
707 quiet = ' --quiet'
708 if options.verbose:
709 quiet = ' '
711 gfi = os.popen ('git-fast-import %s' % quiet, 'w')
713 patches = get_darcs_patches (darcs_repo)
714 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
715 conv_repo.start_at (-1)
717 for p in patches:
719 parent_patch = None
720 parent_number = -1
722 combinations = [(v, w) for v in pending_patches.values ()
723 for w in pending_patches.values ()]
724 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
725 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
726 for (depth, c) in candidates:
727 q = c.darcs_patch
728 try:
729 conv_repo.go_from_to (q, p)
731 parent_patch = q
732 parent_number = q.number
733 progress ('Found existing common parent as predecessor')
734 break
736 except PullConflict:
737 pass
739 ## no branches found where we could attach.
740 ## try previous commits one by one.
741 if not parent_patch:
742 parent_number = p.number - 2
743 while 1:
744 if parent_number >= 0:
745 parent_patch = patches[parent_number]
747 try:
748 conv_repo.go_from_to (parent_patch, p)
749 break
750 except PullConflict:
752 ## simplistic, may not be enough.
753 progress ('conflict, going one back')
754 parent_number -= 1
756 if parent_number < 0:
757 break
759 if (options.history_window
760 and parent_number < p.number - options.history_window):
762 parent_number = -2
763 break
765 if parent_number >= 0 or p.number == 0:
766 progress ('Export %d -> %d (total %d)' % (parent_number,
767 p.number, len (patches)))
768 export_commit (conv_repo, p, parent_patch, gfi)
769 if p.tag_name ():
770 export_tag (p, gfi)
772 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
773 export_checkpoint (gfi)
774 else:
775 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
777 export_pending (gfi)
778 gfi.close ()
779 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
780 os.unlink (f)
782 test_conversion (darcs_repo, git_repo)
784 if not options.debug:
785 conv_repo.clean ()
787 main ()