Breaks on FC7t4
[darcs2git.git] / darcs2git.py
bloba793680b0d900aadf844c724360a6ae1193c9415
1 #! /usr/bin/python
3 """
5 darcs2git -- Darcs to git converter.
7 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 """
26 # TODO:
28 # - time zones
30 # - file modes
32 # - use binary search to find from-patch in case of conflict.
35 import urlparse
36 import distutils.version
37 import glob
38 import os
39 import sys
40 import time
41 import xml.dom.minidom
42 import re
43 import gdbm as dbmodule
44 import gzip
45 import optparse
46 from email.utils import parsedate_tz
48 ################################################################
49 # globals
52 log_file = None
53 options = None
54 mail_to_name_dict = {}
55 pending_patches = {}
56 git_commits = {}
57 used_tags = {}
59 ################################################################
60 # utils
62 class PullConflict (Exception):
63 pass
64 class CommandFailed (Exception):
65 pass
67 def progress (s):
68 sys.stderr.write (s + '\n')
70 def get_cli_options ():
71 class MyOP(optparse.OptionParser):
72 def print_help(self):
73 optparse.OptionParser.print_help (self)
74 print '''
75 DESCRIPTION
77 This tool is a conversion utility for Darcs repositories, importing
78 them in chronological order. It requires a Git version that has
79 git-fast-import. It does not support incremental updating.
81 BUGS
83 * repositories with skewed timestamps, or different patches with
84 equal timestamps will confuse darcs2git.
85 * does not respect file modes or time zones.
86 * too slow. See source code for instructions to speed it up.
87 * probably doesn\'t work on partial repositories
89 Report new bugs to hanwen@xs4all.nl
91 LICENSE
93 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
94 Distributed under terms of the GNU General Public License
95 This program comes with NO WARRANTY.
96 '''
98 p = MyOP ()
100 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
101 p.description='''Convert darcs repo to git.'''
103 def update_map (option, opt, value, parser):
104 for l in open (value).readlines ():
105 (mail, name) = tuple (l.strip ().split ('='))
106 mail_to_name_dict[mail] = name
108 p.add_option ('-a', '--authors', action='callback',
109 callback=update_map,
110 type='string',
111 nargs=1,
112 help='read a text file, containing EMAIL=NAME lines')
114 p.add_option ('--checkpoint-frequency', action='store',
115 dest='checkpoint_frequency',
116 type='int',
117 default=0,
118 help='how often should the git importer be synced?\n'
119 'Default is 0 (no limit)'
122 p.add_option ('-d', '--destination', action='store',
123 type='string',
124 default='',
125 dest='target_git_repo',
126 help='where to put the resulting Git repo.')
128 p.add_option ('--verbose', action='store_true',
129 dest='verbose',
130 default=False,
131 help='show commands as they are invoked')
133 p.add_option ('--history-window', action='store',
134 dest='history_window',
135 type='int',
136 default=0,
137 help='Look back this many patches as conflict ancestors.\n'
138 'Default is 0 (no limit)')
140 p.add_option ('--debug', action='store_true',
141 dest='debug',
142 default=False,
143 help="""add patch numbers to commit messages;
144 don\'t clean conversion repo;
145 test end result.""")
147 global options
148 options, args = p.parse_args ()
149 if not args:
150 p.print_help ()
151 sys.exit (2)
153 if len(urlparse.urlparse(args[0])) == 0:
154 raise NotImplementedError,"We support local DARCS repos only."
156 git_version = distutils.version.LooseVersion(os.popen("git --version","r").read().strip().split(" ")[-1])
157 ideal_version = distutils.version.LooseVersion("1.5.0")
158 if git_version<ideal_version:
159 raise RuntimeError,"You need git >= 1.5.0 for this."
161 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
162 if not options.target_git_repo:
163 options.target_git_repo = options.basename + '.git'
165 if options.debug:
166 global log_file
167 name = options.target_git_repo.replace ('.git', '.log')
168 if name == options.target_git_repo:
169 name += '.log'
171 progress ("Shell log to %s" % name)
172 log_file = open (name, 'w')
174 return (options, args)
176 def read_pipe (cmd, ignore_errors=False):
177 if options.verbose:
178 progress ('pipe %s' % cmd)
179 pipe = os.popen (cmd)
181 val = pipe.read ()
182 if pipe.close () and not ignore_errors:
183 raise CommandFailed ("Pipe failed: %s" % cmd)
185 return val
187 def system (c, ignore_error=0, timed=0):
188 if timed:
189 c = "time " + c
190 if options.verbose:
191 progress (c)
193 if log_file:
194 log_file.write ('%s\n' % c)
195 log_file.flush ()
197 if os.system (c) and not ignore_error:
198 raise CommandFailed ("Command failed: %s" % c)
200 def darcs_date_to_git (x):
201 t = time.strptime (x, '%Y%m%d%H%M%S')
202 return '%d' % int (time.mktime (t))
204 def darcs_timezone (x) :
205 tz = parsedate_tz(x)[9] / 60
206 return "%+03d%02d" % (tz / 60, tz % 60)
208 ################################################################
209 # darcs
211 class DarcsConversionRepo:
212 """Representation of a Darcs repo.
214 The repo is thought to be ordered, and supports methods for
215 going back (obliterate) and forward (pull).
219 def __init__ (self, dir, patches):
220 self.dir = os.path.abspath (dir)
221 self.patches = patches
222 self._current_number = -1
223 self._is_valid = -1
224 self._inventory_dict = None
226 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
228 def __del__ (self):
229 if not options.debug:
230 system ('rm -fr %s' % self.dir)
232 def is_contiguous (self):
233 return (len (self.inventory_dict ()) == self._current_number+1
234 and self.contains_contiguous (self._current_number))
236 def contains_contiguous (self, num):
237 if not self._is_valid:
238 return False
240 darcs_dir = self.dir + '/_darcs'
241 if not os.path.exists (darcs_dir):
242 return False
244 for p in self.patches[:num + 1]:
245 if not self.has_patch (p):
246 return False
248 return True
250 def has_patch (self, p):
251 assert self._is_valid
253 return self.inventory_dict ().has_key (p.short_id ())
255 def pristine_tree (self):
256 return self.dir + '/_darcs/pristine'
258 def go_back_to (self, dest):
260 # at 4, len = 5, go to 2: count == 2
261 count = len (self.inventory_dict()) - dest - 1
263 assert self._is_valid
264 assert count > 0
266 self.checkout ()
267 dir = self.dir
269 progress ('Rewinding %d patches' % count)
270 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
271 d = self.inventory_dict ()
272 for p in self.patches[dest+1:self._current_number+1]:
273 try:
274 del d[p.short_id ()]
275 except KeyError:
276 pass
278 self._current_number = dest
280 def clean (self):
281 system ('rm -rf %s' % self.dir)
283 def checkout (self):
284 dir = self.dir
285 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
287 def pull (self, patch):
288 id = patch.attributes['hash']
289 source_repo = patch.dir
290 dir = self.dir
292 progress ('Pull patch %d' % patch.number)
293 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
295 self._current_number = patch.number
297 ## must reread: the pull may have pulled in others.
298 self._inventory_dict = None
300 def go_forward_to (self, num):
301 d = self.inventory_dict ()
303 pull_me = []
305 ## ugh
306 for p in self.patches[0:num+1]:
307 if not d.has_key (p.short_id ()):
308 pull_me.append (p)
309 d[p.short_id ()] = p
311 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
312 dir = self.dir
313 src = self.patches[0].dir
315 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
316 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
318 def create_fresh (self):
319 dir = self.dir
320 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
321 % locals ())
322 self._is_valid = True
323 self._current_number = -1
324 self._inventory_dict = {}
326 def inventory (self):
327 darcs_dir = self.dir + '/_darcs'
328 i = ''
329 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
330 i += open (f).read ()
331 return i
333 def inventory_dict (self):
334 if type (self._inventory_dict) != type ({}):
335 self._inventory_dict = {}
337 def note_patch (m):
338 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
340 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
341 return self._inventory_dict
343 def start_at (self, num):
345 """Move the repo to NUM.
347 This uses the fishy technique of writing the inventory and
348 constructing the pristine tree with 'darcs repair'
350 progress ('Starting afresh at %d' % num)
352 self.create_fresh ()
353 dir = self.dir
354 iv = open (dir + '/_darcs/inventory', 'w')
355 if log_file:
356 log_file.write ("# messing with _darcs/inventory")
358 for p in self.patches[:num+1]:
359 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
360 iv.write (p.header ())
361 self._inventory_dict[p.short_id ()] = p
362 iv.close ()
364 system ('darcs repair --repo %(dir)s --quiet' % locals ())
365 self.checkout ()
366 self._current_number = num
367 self._is_valid = True
369 def go_to (self, dest):
370 contiguous = self.is_contiguous ()
372 if not self._is_valid:
373 self.start_at (dest)
374 elif dest == self._current_number and contiguous:
375 pass
376 elif (self.contains_contiguous (dest)):
377 self.go_back_to (dest)
378 elif dest - len (self.inventory_dict ()) < dest / 100:
379 self.go_forward_to (dest)
380 else:
381 self.start_at (dest)
384 def go_from_to (self, from_patch, to_patch):
386 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
387 PullConflict if conflict is detected
390 progress ('Trying %s -> %s' % (from_patch, to_patch))
391 dir = self.dir
392 source = to_patch.dir
394 if from_patch:
395 self.go_to (from_patch.number)
396 else:
397 self.create_fresh ()
399 try:
400 self.pull (to_patch)
401 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
402 except CommandFailed:
403 self._is_valid = False
404 raise PullConflict ()
406 if not success:
407 raise PullConflict ()
409 class DarcsPatch:
410 def __repr__ (self):
411 return 'patch %d' % self.number
413 def __init__ (self, xml, dir):
414 self.xml = xml
415 self.dir = dir
416 self.number = -1
417 self.attributes = {}
418 self._contents = None
419 for (nm, value) in xml.attributes.items():
420 self.attributes[nm] = value
422 # fixme: ugh attributes vs. methods.
423 self.extract_author ()
424 self.extract_message ()
425 self.extract_time ()
427 def id (self):
428 return self.attributes['hash']
430 def short_id (self):
431 inv = '*'
432 if self.attributes['inverted'] == 'True':
433 inv = '-'
435 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
437 def filename (self):
438 return self.dir + '/_darcs/patches/' + self.attributes['hash']
440 def contents (self):
441 if type (self._contents) != type (''):
442 f = gzip.open (self.filename ())
443 self._contents = f.read ()
445 return self._contents
447 def header (self):
448 lines = self.contents ().split ('\n')
450 name = lines[0]
451 committer = lines[1] + '\n'
452 committer = re.sub ('] {\n$', ']\n', committer)
453 committer = re.sub ('] *\n$', ']\n', committer)
454 comment = ''
455 if not committer.endswith (']\n'):
456 for l in lines[2:]:
457 if l[0] == ']':
458 comment += ']\n'
459 break
460 comment += l + '\n'
462 header = name + '\n' + committer
463 if comment:
464 header += comment
466 assert header[-1] == '\n'
467 return header
469 def extract_author (self):
470 mail = self.attributes['author']
471 name = ''
472 m = re.search ("^(.*) <(.*)>$", mail)
474 if m:
475 name = m.group (1)
476 mail = m.group (2)
477 else:
478 try:
479 name = mail_to_name_dict[mail]
480 except KeyError:
481 name = mail.split ('@')[0]
483 self.author_name = name
484 self.author_mail = mail
486 def extract_time (self):
487 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
489 def name (self):
490 patch_name = '(no comment)'
491 try:
492 name_elt = self.xml.getElementsByTagName ('name')[0]
493 patch_name = name_elt.childNodes[0].data
494 except IndexError:
495 pass
496 return patch_name
498 def extract_message (self):
499 patch_name = self.name ()
500 comment_elts = self.xml.getElementsByTagName ('comment')
501 comment = ''
502 if comment_elts:
503 comment = comment_elts[0].childNodes[0].data
505 if self.attributes['inverted'] == 'True':
506 patch_name = 'UNDO: ' + patch_name
508 self.message = '%s\n\n%s' % (patch_name, comment)
510 def tag_name (self):
511 patch_name = self.name ()
512 if patch_name.startswith ("TAG "):
513 tag = patch_name[4:]
514 tag = re.sub (r'\s', '_', tag).strip ()
515 tag = re.sub (r':', '_', tag).strip ()
516 return tag
517 return ''
519 def get_darcs_patches (darcs_repo):
520 progress ('reading patches.')
522 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
524 dom = xml.dom.minidom.parseString(xml_string)
525 xmls = dom.documentElement.getElementsByTagName('patch')
527 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
529 n = 0
530 for p in patches:
531 p.number = n
532 n += 1
534 return patches
536 ################################################################
537 # GIT export
539 class GitCommit:
540 def __init__ (self, parent, darcs_patch):
541 self.parent = parent
542 self.darcs_patch = darcs_patch
543 if parent:
544 self.depth = parent.depth + 1
545 else:
546 self.depth = 0
548 def number (self):
549 return self.darcs_patch.number
551 def parent_patch (self):
552 if self.parent:
553 return self.parent.darcs_patch
554 else:
555 return None
557 def common_ancestor (a, b):
558 while 1:
559 if a.depth < b.depth:
560 b = b.parent
561 elif a.depth > b.depth:
562 a = a.parent
563 else:
564 break
566 while a and b:
567 if a == b:
568 return a
570 a = a.parent
571 b = b.parent
573 return None
575 def export_checkpoint (gfi):
576 gfi.write ('checkpoint\n\n')
578 def export_tree (tree, gfi):
579 tree = os.path.normpath (tree)
580 gfi.write ('deleteall\n')
581 for (root, dirs, files) in os.walk (tree):
582 for f in files:
583 rf = os.path.normpath (os.path.join (root, f))
584 s = open (rf).read ()
585 rf = rf.replace (tree + '/', '')
587 gfi.write ('M 644 inline %s\n' % rf)
588 gfi.write ('data %d\n%s\n' % (len (s), s))
589 gfi.write ('\n')
592 def export_commit (repo, patch, last_patch, gfi):
593 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
594 gfi.write ('mark :%d\n' % (patch.number + 1))
595 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
596 patch.author_mail,
597 patch.date))
599 msg = patch.message
600 if options.debug:
601 msg += '\n\n#%d\n' % patch.number
603 gfi.write ('data %d\n%s\n' % (len (msg), msg))
606 mergers = []
607 for (n, p) in pending_patches.items ():
608 if repo.has_patch (p):
609 mergers.append (n)
610 del pending_patches[n]
612 if (last_patch
613 and mergers == []
614 and git_commits.has_key (last_patch.number)):
615 mergers = [last_patch.number]
617 if mergers:
618 gfi.write ('from :%d\n' % (mergers[0] + 1))
619 for m in mergers[1:]:
620 gfi.write ('merge :%d\n' % (m + 1))
622 pending_patches[patch.number] = patch
623 export_tree (repo.pristine_tree (), gfi)
626 n = -1
627 if last_patch:
628 n = last_patch.number
629 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
630 patch)
632 def export_pending (gfi):
633 if len (pending_patches.items ()) == 1:
634 gfi.write ('reset refs/heads/master\n')
635 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
637 progress ("Creating branch master")
638 return
640 for (n, p) in pending_patches.items ():
641 gfi.write ('reset refs/heads/master%d\n' % n)
642 gfi.write ('from :%d\n\n' % (n+1))
644 progress ("Creating branch master%d" % n)
646 patches = pending_patches.values()
647 patch = patches[0]
648 gfi.write ('commit refs/heads/master\n')
649 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
650 patch.author_mail,
651 patch.date))
652 msg = 'tie together'
653 gfi.write ('data %d\n%s\n' % (len(msg), msg))
654 gfi.write ('from :%d\n' % (patch.number + 1))
655 for p in patches[1:]:
656 gfi.write ('merge :%d\n' % (p.number + 1))
657 gfi.write ('\n')
659 def export_tag (patch, gfi):
660 gfi.write ('tag %s\n' % patch.tag_name ())
661 gfi.write ('from :%d\n' % (patch.number + 1))
662 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
663 patch.author_mail,
664 patch.date))
665 gfi.write ('data %d\n%s\n' % (len (patch.message),
666 patch.message))
668 ################################################################
669 # main.
671 def test_conversion (darcs_repo, git_repo):
672 pristine = '%(darcs_repo)s/_darcs/pristine' % locals ()
673 if not os.path.exists (pristine):
674 progress ("darcs repository does not contain pristine tree?!")
675 return
677 gd = options.basename + '.checkouttmp.git'
678 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
679 diff_cmd = 'diff --exclude .git -urN %(gd)s %(pristine)s' % locals ()
680 system ('rm -rf %(gd)s' % locals ())
682 diff = read_pipe (diff_cmd, ignore_errors=True)
683 if diff:
684 if len (diff) > 1024:
685 diff = diff[:512] + '\n...\n' + diff[512:]
687 progress ("Conversion introduced changes: %s" % diff)
688 else:
689 progress ("Checkout matches pristine darcs tree.")
691 def main ():
692 (options, args) = get_cli_options ()
694 darcs_repo = os.path.abspath (args[0])
695 git_repo = os.path.abspath (options.target_git_repo)
697 if os.path.exists (git_repo):
698 system ('rm -rf %(git_repo)s' % locals ())
700 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
701 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
703 os.environ['GIT_DIR'] = git_repo
705 gfi = os.popen ('git-fast-import --quiet', 'w')
707 patches = get_darcs_patches (darcs_repo)
708 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
709 conv_repo.start_at (-1)
711 for p in patches:
713 parent_patch = None
714 parent_number = -1
716 combinations = [(v, w) for v in pending_patches.values ()
717 for w in pending_patches.values ()]
718 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
719 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
720 for (depth, c) in candidates:
721 q = c.darcs_patch
722 try:
723 conv_repo.go_from_to (q, p)
725 parent_patch = q
726 parent_number = q.number
727 progress ('Found existing common parent as predecessor')
728 break
730 except PullConflict:
731 pass
733 ## no branches found where we could attach.
734 ## try previous commits one by one.
735 if not parent_patch:
736 parent_number = p.number - 2
737 while 1:
738 if parent_number >= 0:
739 parent_patch = patches[parent_number]
741 try:
742 conv_repo.go_from_to (parent_patch, p)
743 break
744 except PullConflict:
746 ## simplistic, may not be enough.
747 progress ('conflict, going one back')
748 parent_number -= 1
750 if parent_number < 0:
751 break
753 if (options.history_window
754 and parent_number < p.number - options.history_window):
756 parent_number = -2
757 break
759 if parent_number >= 0 or p.number == 0:
760 progress ('Export %d -> %d (total %d)' % (parent_number,
761 p.number, len (patches)))
762 export_commit (conv_repo, p, parent_patch, gfi)
763 if p.tag_name ():
764 export_tag (p, gfi)
766 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
767 export_checkpoint (gfi)
768 else:
769 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
771 export_pending (gfi)
772 gfi.close ()
774 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
775 os.unlink (f)
777 test_conversion (darcs_repo, git_repo)
779 if not options.debug:
780 conv_repo.clean ()
782 main ()