minor fixes.
[darcs2git.git] / darcs2git.py
blobd3f8ff324d0af31fd590854ea06f71256b0307ec
1 #! /usr/bin/python
3 """
5 darcs2git -- Darcs to git converter.
7 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 """
26 # TODO:
28 # - time zones
30 # - file modes
32 # - use binary search to find from-patch in case of conflict.
35 import glob
36 import os
37 import sys
38 import time
39 import xml.dom.minidom
40 import re
41 import gdbm as dbmodule
42 import gzip
43 import optparse
45 ################################################################
46 # globals
49 log_file = None
50 options = None
51 mail_to_name_dict = {}
52 pending_patches = {}
53 git_commits = {}
54 used_tags = {}
56 ################################################################
57 # utils
59 class PullConflict (Exception):
60 pass
61 class CommandFailed (Exception):
62 pass
64 def progress (s):
65 sys.stderr.write (s + '\n')
67 def get_cli_options ():
68 class MyOP(optparse.OptionParser):
69 def print_help(self):
70 optparse.OptionParser.print_help (self)
71 print '''
72 DESCRIPTION
74 This tool is a conversion utility for Darcs repositories, importing
75 them in chronological order. It requires a Git version that has
76 git-fast-import. It does not support incremental updating.
78 BUGS
80 * repositories with skewed timestamps, or different patches with
81 equal timestamps will confuse darcs2git.
82 * does not respect file modes or time zones.
83 * too slow. See source code for instructions to speed it up.
84 * probably doesn\'t work on partial repositories
86 Report new bugs to hanwen@xs4all.nl
88 LICENSE
90 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
91 Distributed under terms of the GNU General Public License
92 This program comes with NO WARRANTY.
93 '''
95 p = MyOP ()
97 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
98 p.description='''Convert darcs repo to git.'''
100 def update_map (option, opt, value, parser):
101 for l in open (value).readlines ():
102 (mail, name) = tuple (l.strip ().split ('='))
103 mail_to_name_dict[mail] = name
105 p.add_option ('-a', '--authors', action='callback',
106 callback=update_map,
107 type='string',
108 nargs=1,
109 help='read a text file, containing EMAIL=NAME lines')
111 p.add_option ('--checkpoint-frequency', action='store',
112 dest='checkpoint_frequency',
113 type='int',
114 default=0,
115 help='how often should the git importer be synced?\n'
116 'Default is 0 (no limit)'
119 p.add_option ('-d', '--destination', action='store',
120 type='string',
121 default='',
122 dest='target_git_repo',
123 help='where to put the resulting Git repo.')
125 p.add_option ('--verbose', action='store_true',
126 dest='verbose',
127 default=False,
128 help='show commands as they are invoked')
130 p.add_option ('--history-window', action='store',
131 dest='history_window',
132 type='int',
133 default=0,
134 help='Look back this many patches as conflict ancestors.\n'
135 'Default is 0 (no limit)')
137 p.add_option ('--debug', action='store_true',
138 dest='debug',
139 default=False,
140 help="""add patch numbers to commit messages;
141 don\'t clean conversion repo;
142 test end result.""")
144 global options
145 options, args = p.parse_args ()
146 if not args:
147 p.print_help ()
148 sys.exit (2)
150 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
151 if not options.target_git_repo:
152 options.target_git_repo = options.basename + '.git'
154 if options.debug:
155 global log_file
156 name = options.target_git_repo.replace ('.git', '.log')
157 if name == options.target_git_repo:
158 name += '.log'
160 progress ("Shell log to %s" % name)
161 log_file = open (name, 'w')
163 return (options, args)
165 def read_pipe (cmd, ignore_errors=False):
166 if options.verbose:
167 progress ('pipe %s' % cmd)
168 pipe = os.popen (cmd)
170 val = pipe.read ()
171 if pipe.close () and not ignore_errors:
172 raise CommandFailed ("Pipe failed: %s" % cmd)
174 return val
176 def system (c, ignore_error=0, timed=0):
177 if timed:
178 c = "time " + c
179 if options.verbose:
180 progress (c)
182 if log_file:
183 log_file.write ('%s\n' % c)
184 log_file.flush ()
186 if os.system (c) and not ignore_error:
187 raise CommandFailed ("Command failed: %s" % c)
189 def darcs_date_to_git (x):
190 t = time.strptime (x, '%Y%m%d%H%M%S')
191 return '%d' % int (time.mktime (t))
193 def darcs_timezone (x) :
194 time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')
196 # todo
197 return "+0100"
199 ################################################################
200 # darcs
202 class DarcsConversionRepo:
203 """Representation of a Darcs repo.
205 The repo is thought to be ordered, and supports methods for
206 going back (obliterate) and forward (pull).
210 def __init__ (self, dir, patches):
211 self.dir = os.path.abspath (dir)
212 self.patches = patches
213 self._current_number = -1
214 self._is_valid = -1
215 self._inventory_dict = None
217 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
219 def __del__ (self):
220 if not options.debug:
221 system ('rm -fr %s' % self.dir)
223 def is_contiguous (self):
224 return (len (self.inventory_dict ()) == self._current_number+1
225 and self.contains_contiguous (self._current_number))
227 def contains_contiguous (self, num):
228 if not self._is_valid:
229 return False
231 darcs_dir = self.dir + '/_darcs'
232 if not os.path.exists (darcs_dir):
233 return False
235 for p in self.patches[:num + 1]:
236 if not self.has_patch (p):
237 return False
239 return True
241 def has_patch (self, p):
242 assert self._is_valid
244 return self.inventory_dict ().has_key (p.short_id ())
246 def pristine_tree (self):
247 return self.dir + '/_darcs/pristine'
249 def go_back_to (self, dest):
251 # at 4, len = 5, go to 2: count == 2
252 count = len (self.inventory_dict()) - dest - 1
254 assert self._is_valid
255 assert count > 0
257 self.checkout ()
258 dir = self.dir
260 progress ('Rewinding %d patches' % count)
261 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
262 d = self.inventory_dict ()
263 for p in self.patches[dest+1:self._current_number+1]:
264 try:
265 del d[p.short_id ()]
266 except KeyError:
267 pass
269 self._current_number = dest
271 def clean (self):
272 system ('rm -rf %s' % self.dir)
274 def checkout (self):
275 dir = self.dir
276 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
278 def pull (self, patch):
279 id = patch.attributes['hash']
280 source_repo = patch.dir
281 dir = self.dir
283 progress ('Pull patch %d' % patch.number)
284 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
286 self._current_number = patch.number
288 ## must reread: the pull may have pulled in others.
289 self._inventory_dict = None
291 def go_forward_to (self, num):
292 d = self.inventory_dict ()
294 pull_me = []
296 ## ugh
297 for p in self.patches[0:num+1]:
298 if not d.has_key (p.short_id ()):
299 pull_me.append (p)
300 d[p.short_id ()] = p
302 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
303 dir = self.dir
304 src = self.patches[0].dir
306 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
307 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
309 def create_fresh (self):
310 dir = self.dir
311 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
312 % locals ())
313 self._is_valid = True
314 self._current_number = -1
315 self._inventory_dict = {}
317 def inventory (self):
318 darcs_dir = self.dir + '/_darcs'
319 i = ''
320 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
321 i += open (f).read ()
322 return i
324 def inventory_dict (self):
325 if type (self._inventory_dict) != type ({}):
326 self._inventory_dict = {}
328 def note_patch (m):
329 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
331 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
332 return self._inventory_dict
334 def start_at (self, num):
336 """Move the repo to NUM.
338 This uses the fishy technique of writing the inventory and
339 constructing the pristine tree with 'darcs repair'
341 progress ('Starting afresh at %d' % num)
343 self.create_fresh ()
344 dir = self.dir
345 iv = open (dir + '/_darcs/inventory', 'w')
346 if log_file:
347 log_file.write ("# messing with _darcs/inventory")
349 for p in self.patches[:num+1]:
350 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
351 iv.write (p.header ())
352 self._inventory_dict[p.short_id ()] = p
353 iv.close ()
355 system ('darcs repair --repo %(dir)s --quiet' % locals ())
356 self.checkout ()
357 self._current_number = num
358 self._is_valid = True
360 def go_to (self, dest):
361 contiguous = self.is_contiguous ()
363 if not self._is_valid:
364 self.start_at (dest)
365 elif dest == self._current_number and contiguous:
366 pass
367 elif (self.contains_contiguous (dest)):
368 self.go_back_to (dest)
369 elif dest - len (self.inventory_dict ()) < dest / 100:
370 self.go_forward_to (dest)
371 else:
372 self.start_at (dest)
375 def go_from_to (self, from_patch, to_patch):
377 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
378 PullConflict if conflict is detected
381 progress ('Trying %s -> %s' % (from_patch, to_patch))
382 dir = self.dir
383 source = to_patch.dir
385 if from_patch:
386 self.go_to (from_patch.number)
387 else:
388 self.create_fresh ()
390 try:
391 self.pull (to_patch)
392 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
393 except CommandFailed:
394 self._is_valid = False
395 raise PullConflict ()
397 if not success:
398 raise PullConflict ()
400 class DarcsPatch:
401 def __repr__ (self):
402 return 'patch %d' % self.number
404 def __init__ (self, xml, dir):
405 self.xml = xml
406 self.dir = dir
407 self.number = -1
408 self.attributes = {}
409 self._contents = None
410 for (nm, value) in xml.attributes.items():
411 self.attributes[nm] = value
413 # fixme: ugh attributes vs. methods.
414 self.extract_author ()
415 self.extract_message ()
416 self.extract_time ()
418 def id (self):
419 return self.attributes['hash']
421 def short_id (self):
422 inv = '*'
423 if self.attributes['inverted'] == 'True':
424 inv = '-'
426 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
428 def filename (self):
429 return self.dir + '/_darcs/patches/' + self.attributes['hash']
431 def contents (self):
432 if type (self._contents) != type (''):
433 f = gzip.open (self.filename ())
434 self._contents = f.read ()
436 return self._contents
438 def header (self):
439 lines = self.contents ().split ('\n')
441 name = lines[0]
442 committer = lines[1] + '\n'
443 committer = re.sub ('] {\n$', ']\n', committer)
444 committer = re.sub ('] *\n$', ']\n', committer)
445 comment = ''
446 if not committer.endswith (']\n'):
447 for l in lines[2:]:
448 if l[0] == ']':
449 comment += ']\n'
450 break
451 comment += l + '\n'
453 header = name + '\n' + committer
454 if comment:
455 header += comment
457 assert header[-1] == '\n'
458 return header
460 def extract_author (self):
461 mail = self.attributes['author']
462 name = ''
463 m = re.search ("^(.*) <(.*)>$", mail)
465 if m:
466 name = m.group (1)
467 mail = m.group (2)
468 else:
469 try:
470 name = mail_to_name_dict[mail]
471 except KeyError:
472 name = mail.split ('@')[0]
474 self.author_name = name
475 self.author_mail = mail
477 def extract_time (self):
478 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
480 def name (self):
481 patch_name = '(no comment)'
482 try:
483 name_elt = self.xml.getElementsByTagName ('name')[0]
484 patch_name = name_elt.childNodes[0].data
485 except IndexError:
486 pass
487 return patch_name
489 def extract_message (self):
490 patch_name = self.name ()
491 comment_elts = self.xml.getElementsByTagName ('comment')
492 comment = ''
493 if comment_elts:
494 comment = comment_elts[0].childNodes[0].data
496 if self.attributes['inverted'] == 'True':
497 patch_name = 'UNDO: ' + patch_name
499 self.message = '%s\n\n%s' % (patch_name, comment)
501 def tag_name (self):
502 patch_name = self.name ()
503 if patch_name.startswith ("TAG "):
504 tag = patch_name[4:]
505 tag = re.sub (r'\s', '_', tag).strip ()
506 tag = re.sub (r':', '_', tag).strip ()
507 return tag
508 return ''
510 def get_darcs_patches (darcs_repo):
511 progress ('reading patches.')
513 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
515 dom = xml.dom.minidom.parseString(xml_string)
516 xmls = dom.documentElement.getElementsByTagName('patch')
518 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
520 n = 0
521 for p in patches:
522 p.number = n
523 n += 1
525 return patches
527 ################################################################
528 # GIT export
530 class GitCommit:
531 def __init__ (self, parent, darcs_patch):
532 self.parent = parent
533 self.darcs_patch = darcs_patch
534 if parent:
535 self.depth = parent.depth + 1
536 else:
537 self.depth = 0
539 def number (self):
540 return self.darcs_patch.number
542 def parent_patch (self):
543 if self.parent:
544 return self.parent.darcs_patch
545 else:
546 return None
548 def common_ancestor (a, b):
549 while 1:
550 if a.depth < b.depth:
551 b = b.parent
552 elif a.depth > b.depth:
553 a = a.parent
554 else:
555 break
557 while a and b:
558 if a == b:
559 return a
561 a = a.parent
562 b = b.parent
564 return None
566 def export_checkpoint (gfi):
567 gfi.write ('checkpoint\n\n')
569 def export_tree (tree, gfi):
570 tree = os.path.normpath (tree)
571 gfi.write ('deleteall\n')
572 for (root, dirs, files) in os.walk (tree):
573 for f in files:
574 rf = os.path.normpath (os.path.join (root, f))
575 s = open (rf).read ()
576 rf = rf.replace (tree + '/', '')
578 gfi.write ('M 644 inline %s\n' % rf)
579 gfi.write ('data %d\n%s\n' % (len (s), s))
580 gfi.write ('\n')
583 def export_commit (repo, patch, last_patch, gfi):
584 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
585 gfi.write ('mark :%d\n' % (patch.number + 1))
586 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
587 patch.author_mail,
588 patch.date))
590 msg = patch.message
591 if options.debug:
592 msg += '\n\n#%d\n' % patch.number
594 gfi.write ('data %d\n%s\n' % (len (msg), msg))
597 mergers = []
598 for (n, p) in pending_patches.items ():
599 if repo.has_patch (p):
600 mergers.append (n)
601 del pending_patches[n]
603 if (last_patch
604 and mergers == []
605 and git_commits.has_key (last_patch.number)):
606 mergers = [last_patch.number]
608 if mergers:
609 gfi.write ('from :%d\n' % (mergers[0] + 1))
610 for m in mergers[1:]:
611 gfi.write ('merge :%d\n' % (m + 1))
613 pending_patches[patch.number] = patch
614 export_tree (repo.pristine_tree (), gfi)
617 n = -1
618 if last_patch:
619 n = last_patch.number
620 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
621 patch)
623 def export_pending (gfi):
624 if len (pending_patches.items ()) == 1:
625 gfi.write ('reset refs/heads/master\n')
626 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
628 progress ("Creating branch master")
629 return
631 for (n, p) in pending_patches.items ():
632 gfi.write ('reset refs/heads/master%d\n' % n)
633 gfi.write ('from :%d\n\n' % (n+1))
635 progress ("Creating branch master%d" % n)
637 patches = pending_patches.values()
638 patch = patches[0]
639 gfi.write ('commit refs/heads/master\n')
640 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
641 patch.author_mail,
642 patch.date))
643 msg = 'tie together'
644 gfi.write ('data %d\n%s\n' % (len(msg), msg))
645 gfi.write ('from :%d\n' % (patch.number + 1))
646 for p in patches[1:]:
647 gfi.write ('merge :%d\n' % (p.number + 1))
648 gfi.write ('\n')
650 def export_tag (patch, gfi):
651 gfi.write ('tag %s\n' % patch.tag_name ())
652 gfi.write ('from :%d\n' % (patch.number + 1))
653 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
654 patch.author_mail,
655 patch.date))
656 gfi.write ('data %d\n%s\n' % (len (patch.message),
657 patch.message))
659 ################################################################
660 # main.
662 def test_conversion (darcs_repo, git_repo):
663 pristine = '%(darcs_repo)s/_darcs/pristine' % locals ()
664 if not os.path.exists (pristine):
665 progress ("darcs repository does not contain pristine tree?!")
666 return
668 gd = options.basename + '.checkouttmp.git'
669 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
670 diff_cmd = 'diff --exclude .git -urN %(gd)s %(pristine)s' % locals ()
671 system ('rm -rf %(gd)s' % locals ())
673 diff = read_pipe (diff_cmd, ignore_errors=True)
674 if diff:
675 if len (diff) > 1024:
676 diff = diff[:512] + '\n...\n' + diff[512:]
678 progress ("Conversion introduced changes: %s" % diff)
679 else:
680 progress ("Checkout matches pristine darcs tree.")
682 def main ():
683 (options, args) = get_cli_options ()
685 darcs_repo = os.path.abspath (args[0])
686 git_repo = os.path.abspath (options.target_git_repo)
688 if os.path.exists (git_repo):
689 system ('rm -rf %(git_repo)s' % locals ())
691 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
692 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
694 os.environ['GIT_DIR'] = git_repo
696 gfi = os.popen ('git-fast-import --quiet', 'w')
698 patches = get_darcs_patches (darcs_repo)
699 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
700 conv_repo.start_at (-1)
702 for p in patches:
704 parent_patch = None
705 parent_number = -1
707 combinations = [(v, w) for v in pending_patches.values ()
708 for w in pending_patches.values ()]
709 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
710 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
711 for (depth, c) in candidates:
712 q = c.darcs_patch
713 try:
714 conv_repo.go_from_to (q, p)
716 parent_patch = q
717 parent_number = q.number
718 progress ('Found existing common parent as predecessor')
719 break
721 except PullConflict:
722 pass
724 ## no branches found where we could attach.
725 ## try previous commits one by one.
726 if not parent_patch:
727 parent_number = p.number - 2
728 while 1:
729 if parent_number >= 0:
730 parent_patch = patches[parent_number]
732 try:
733 conv_repo.go_from_to (parent_patch, p)
734 break
735 except PullConflict:
737 ## simplistic, may not be enough.
738 progress ('conflict, going one back')
739 parent_number -= 1
741 if parent_number < 0:
742 break
744 if (options.history_window
745 and parent_number < p.number - options.history_window):
747 parent_number = -2
748 break
750 if parent_number >= 0 or p.number == 0:
751 progress ('Export %d -> %d (total %d)' % (parent_number,
752 p.number, len (patches)))
753 export_commit (conv_repo, p, parent_patch, gfi)
754 if p.tag_name ():
755 export_tag (p, gfi)
757 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
758 export_checkpoint (gfi)
759 else:
760 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
762 export_pending (gfi)
763 gfi.close ()
765 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
766 os.unlink (f)
768 test_conversion (darcs_repo, git_repo)
770 if not options.debug:
771 conv_repo.clean ()
773 main ()