Fixed a time conversion issue. git-fast-import was receiving a localtime and timezone...
[darcs2git.git] / darcs2git.py
blob9a8f62d98cb084d5a5e7a21bdfc6bc8f96efa202
1 #!/usr/bin/env python
3 # Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
5 # Distributed under terms of the GNU General Public License
6 # This program comes with NO WARRANTY.
11 # TODO:
13 # - time zones
15 # - file modes
17 # - use binary search to find from-patch ica. conflict.
20 import urlparse
21 import distutils.version
22 import glob
23 import os
24 import sys
25 import time
26 import xml.dom.minidom
27 import re
28 import gdbm as dbmodule
29 import gzip
30 import optparse
31 from email.utils import parsedate_tz
32 from calendar import timegm
34 ################################################################
35 # globals
38 log_file = None
39 options = None
40 mail_to_name_dict = {}
41 pending_patches = {}
42 git_commits = {}
43 used_tags = {}
45 ################################################################
46 # utils
48 class PullConflict (Exception):
49 pass
50 class CommandFailed (Exception):
51 pass
53 def progress (s):
54 sys.stderr.write (s + '\n')
56 def get_cli_options ():
57 class MyOP(optparse.OptionParser):
58 def print_help(self):
59 optparse.OptionParser.print_help (self)
60 print '''
61 DESCRIPTION
63 This tool is a conversion utility for Darcs repositories, importing
64 them in chronological order. It requires a Git version that has
65 git-fast-import. It does not support incremental updating.
67 BUGS
69 * repositories with skewed timestamps, or different patches with
70 equal timestamps will confuse darcs2git.
71 * does not respect file modes or time zones.
72 * too slow. See source code for instructions to speed it up.
73 * probably doesn\'t work on partial repositories
75 Report new bugs to hanwen@xs4all.nl
77 LICENSE
79 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
80 Distributed under terms of the GNU General Public License
81 This program comes with NO WARRANTY.
82 '''
84 p = MyOP ()
86 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
87 p.description='''Convert darcs repo to git.'''
89 def update_map (option, opt, value, parser):
90 for l in open (value).readlines ():
91 (mail, name) = tuple (l.strip ().split ('='))
92 mail_to_name_dict[mail] = name
94 p.add_option ('-a', '--authors', action='callback',
95 callback=update_map,
96 type='string',
97 nargs=1,
98 help='read a text file, containing EMAIL=NAME lines')
100 p.add_option ('--checkpoint-frequency', action='store',
101 dest='checkpoint_frequency',
102 type='int',
103 default=0,
104 help='how often should the git importer be synced?\n'
105 'Default is 0 (no limit)'
108 p.add_option ('-d', '--destination', action='store',
109 type='string',
110 default='',
111 dest='target_git_repo',
112 help='where to put the resulting Git repo.')
114 p.add_option ('--verbose', action='store_true',
115 dest='verbose',
116 default=False,
117 help='show commands as they are invoked')
119 p.add_option ('--history-window', action='store',
120 dest='history_window',
121 type='int',
122 default=0,
123 help='Look back this many patches as conflict ancestors.\n'
124 'Default is 0 (no limit)'
127 p.add_option ('--debug', action='store_true',
128 dest='debug',
129 default=False,
130 help="""add patch numbers to commit messages;
131 don\'t clean conversion repo;
132 test end result.""")
134 global options
135 options, args = p.parse_args ()
136 if not args:
137 p.print_help ()
138 sys.exit (2)
140 if len(urlparse.urlparse(args[0])) == 0:
141 raise NotImplementedError,"We support local DARCS repos only."
143 git_version = distutils.version.LooseVersion(os.popen("git --version","r").read().strip().split(" ")[-1])
144 ideal_version = distutils.version.LooseVersion("1.5.0")
145 if git_version<ideal_version:
146 raise RuntimeError,"You need git >= 1.5.0 for this."
148 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
149 if not options.target_git_repo:
150 options.target_git_repo = options.basename + '.git'
152 if options.debug:
153 global log_file
154 name = options.target_git_repo.replace ('.git', '.log')
155 if name == options.target_git_repo:
156 name += '.log'
158 progress ("Shell log to %s" % name)
159 log_file = open (name, 'w')
161 return (options, args)
163 def read_pipe (cmd, ignore_errors=False):
164 if options.verbose:
165 progress ('pipe %s' % cmd)
166 pipe = os.popen (cmd)
168 val = pipe.read ()
169 if pipe.close () and not ignore_errors:
170 raise CommandFailed ("Pipe failed: %s" % cmd)
172 return val
174 def system (c, ignore_error=0, timed=0):
175 if timed:
176 c = "time " + c
177 if options.verbose:
178 progress (c)
180 if log_file:
181 log_file.write ('%s\n' % c)
182 log_file.flush ()
184 if os.system (c) and not ignore_error:
185 raise CommandFailed ("Command failed: %s" % c)
187 def darcs_date_to_git (x):
188 t = time.strptime (x, '%Y%m%d%H%M%S')
189 return '%d' % int (timegm (t))
191 def darcs_timezone (x) :
192 tz = parsedate_tz(x)[9] / 60
193 return "%+03d%02d" % (tz / 60, tz % 60)
195 ################################################################
196 # darcs
198 class DarcsConversionRepo:
199 """Representation of a Darcs repo.
201 The repo is thought to be ordered, and supports methods for
202 going back (obliterate) and forward (pull).
206 def __init__ (self, dir, patches):
207 self.dir = os.path.abspath (dir)
208 self.patches = patches
209 self._current_number = -1
210 self._is_valid = -1
211 self._inventory_dict = None
213 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
215 def is_contiguous (self):
216 return (len (self.inventory_dict ()) == self._current_number+1
217 and self.contains_contiguous (self._current_number))
219 def contains_contiguous (self, num):
220 if not self._is_valid:
221 return False
223 darcs_dir = self.dir + '/_darcs'
224 if not os.path.exists (darcs_dir):
225 return False
227 for p in self.patches[:num + 1]:
228 if not self.has_patch (p):
229 return False
231 return True
233 def has_patch (self, p):
234 assert self._is_valid
236 return self.inventory_dict ().has_key (p.short_id ())
238 def pristine_tree (self):
239 return self.dir + '/_darcs/pristine'
241 def go_back_to (self, dest):
243 # at 4, len = 5, go to 2: count == 2
244 count = len (self.inventory_dict()) - dest - 1
246 assert self._is_valid
247 assert count > 0
249 self.checkout ()
250 dir = self.dir
252 progress ('Rewinding %d patches' % count)
253 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
254 d = self.inventory_dict ()
255 for p in self.patches[dest+1:self._current_number+1]:
256 try:
257 del d[p.short_id ()]
258 except KeyError:
259 pass
261 self._current_number = dest
263 def clean (self):
264 system ('rm -rf %s' % self.dir)
266 def checkout (self):
267 dir = self.dir
268 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
270 def pull (self, patch):
271 id = patch.attributes['hash']
272 source_repo = patch.dir
273 dir = self.dir
275 progress ('Pull patch %d' % patch.number)
276 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
278 self._current_number = patch.number
280 ## must reread: the pull may have pulled in others.
281 self._inventory_dict = None
283 def go_forward_to (self, num):
284 d = self.inventory_dict ()
286 pull_me = []
288 ## ugh
289 for p in self.patches[0:num+1]:
290 if not d.has_key (p.short_id ()):
291 pull_me.append (p)
292 d[p.short_id ()] = p
294 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
295 dir = self.dir
296 src = self.patches[0].dir
298 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
299 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
301 def create_fresh (self):
302 dir = self.dir
303 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
304 % locals ())
305 self._is_valid = True
306 self._current_number = -1
307 self._inventory_dict = {}
309 def inventory (self):
310 darcs_dir = self.dir + '/_darcs'
311 i = ''
312 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
313 i += open (f).read ()
314 return i
316 def inventory_dict (self):
317 if type (self._inventory_dict) != type ({}):
318 self._inventory_dict = {}
320 def note_patch (m):
321 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
323 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
324 return self._inventory_dict
326 def start_at (self, num):
328 """Move the repo to NUM.
330 This uses the fishy technique of writing the inventory and
331 constructing the pristine tree with 'darcs repair'
333 progress ('Starting afresh at %d' % num)
335 self.create_fresh ()
336 dir = self.dir
337 iv = open (dir + '/_darcs/inventory', 'w')
338 if log_file:
339 log_file.write ("# messing with _darcs/inventory")
341 for p in self.patches[:num+1]:
342 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
343 iv.write (p.header ())
344 self._inventory_dict[p.short_id ()] = p
345 iv.close ()
347 system ('darcs repair --repo %(dir)s --quiet' % locals ())
348 self.checkout ()
349 self._current_number = num
350 self._is_valid = True
352 def go_to (self, dest):
353 contiguous = self.is_contiguous ()
355 if not self._is_valid:
356 self.start_at (dest)
357 elif dest == self._current_number and contiguous:
358 pass
359 elif (self.contains_contiguous (dest)):
360 self.go_back_to (dest)
361 elif dest - len (self.inventory_dict ()) < dest / 100:
362 self.go_forward_to (dest)
363 else:
364 self.start_at (dest)
367 def go_from_to (self, from_patch, to_patch):
369 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
370 PullConflict if conflict is detected
373 progress ('Trying %s -> %s' % (from_patch, to_patch))
374 dir = self.dir
375 source = to_patch.dir
377 if from_patch:
378 self.go_to (from_patch.number)
379 else:
380 self.create_fresh ()
382 try:
383 self.pull (to_patch)
384 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
385 except CommandFailed:
386 self._is_valid = False
387 raise PullConflict ()
389 if not success:
390 raise PullConflict ()
392 class DarcsPatch:
393 def __repr__ (self):
394 return 'patch %d' % self.number
396 def __init__ (self, xml, dir):
397 self.xml = xml
398 self.dir = dir
399 self.number = -1
400 self.attributes = {}
401 self._contents = None
402 for (nm, value) in xml.attributes.items():
403 self.attributes[nm] = value
405 # fixme: ugh attributes vs. methods.
406 self.extract_author ()
407 self.extract_message ()
408 self.extract_time ()
410 def id (self):
411 return self.attributes['hash']
413 def short_id (self):
414 inv = '*'
415 if self.attributes['inverted'] == 'True':
416 inv = '-'
418 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
420 def filename (self):
421 return self.dir + '/_darcs/patches/' + self.attributes['hash']
423 def contents (self):
424 if type (self._contents) != type (''):
425 f = gzip.open (self.filename ())
426 self._contents = f.read ()
428 return self._contents
430 def header (self):
431 lines = self.contents ().split ('\n')
433 name = lines[0]
434 committer = lines[1] + '\n'
435 committer = re.sub ('] {\n$', ']\n', committer)
436 committer = re.sub ('] *\n$', ']\n', committer)
437 comment = ''
438 if not committer.endswith (']\n'):
439 for l in lines[2:]:
440 if l[0] == ']':
441 comment += ']\n'
442 break
443 comment += l + '\n'
445 header = name + '\n' + committer
446 if comment:
447 header += comment
449 assert header[-1] == '\n'
450 return header
452 def extract_author (self):
453 mail = self.attributes['author']
454 name = ''
455 m = re.search ("^(.*) <(.*)>$", mail)
457 if m:
458 name = m.group (1)
459 mail = m.group (2)
460 else:
461 try:
462 name = mail_to_name_dict[mail]
463 except KeyError:
464 name = mail.split ('@')[0]
466 self.author_name = name
467 self.author_mail = mail
469 def extract_time (self):
470 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
472 def name (self):
473 patch_name = '(no comment)'
474 try:
475 name_elt = self.xml.getElementsByTagName ('name')[0]
476 patch_name = name_elt.childNodes[0].data
477 except IndexError:
478 pass
479 return patch_name
481 def extract_message (self):
482 patch_name = self.name ()
483 comment_elts = self.xml.getElementsByTagName ('comment')
484 comment = ''
485 if comment_elts:
486 comment = comment_elts[0].childNodes[0].data
488 if self.attributes['inverted'] == 'True':
489 patch_name = 'UNDO: ' + patch_name
491 self.message = '%s\n\n%s' % (patch_name, comment)
493 def tag_name (self):
494 patch_name = self.name ()
495 if patch_name.startswith ("TAG "):
496 tag = patch_name[4:]
497 tag = re.sub (r'\s', '_', tag).strip ()
498 tag = re.sub (r':', '_', tag).strip ()
499 return tag
500 return ''
502 def get_darcs_patches (darcs_repo):
503 progress ('reading patches.')
505 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
507 dom = xml.dom.minidom.parseString(xml_string)
508 xmls = dom.documentElement.getElementsByTagName('patch')
510 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
512 n = 0
513 for p in patches:
514 p.number = n
515 n += 1
517 return patches
519 ################################################################
520 # GIT export
522 class GitCommit:
523 def __init__ (self, parent, darcs_patch):
524 self.parent = parent
525 self.darcs_patch = darcs_patch
526 if parent:
527 self.depth = parent.depth + 1
528 else:
529 self.depth = 0
531 def number (self):
532 return self.darcs_patch.number
534 def parent_patch (self):
535 if self.parent:
536 return self.parent.darcs_patch
537 else:
538 return None
540 def common_ancestor (a, b):
541 while 1:
542 if a.depth < b.depth:
543 b = b.parent
544 elif a.depth > b.depth:
545 a = a.parent
546 else:
547 break
549 while a and b:
550 if a == b:
551 return a
553 a = a.parent
554 b = b.parent
556 return None
558 def export_checkpoint (gfi):
559 gfi.write ('checkpoint\n\n')
561 def export_tree (tree, gfi):
562 tree = os.path.normpath (tree)
563 gfi.write ('deleteall\n')
564 for (root, dirs, files) in os.walk (tree):
565 for f in files:
566 rf = os.path.normpath (os.path.join (root, f))
567 s = open (rf).read ()
568 rf = rf.replace (tree + '/', '')
570 gfi.write ('M 644 inline %s\n' % rf)
571 gfi.write ('data %d\n%s\n' % (len (s), s))
572 gfi.write ('\n')
575 def export_commit (repo, patch, last_patch, gfi):
576 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
577 gfi.write ('mark :%d\n' % (patch.number + 1))
578 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
579 patch.author_mail,
580 patch.date))
582 msg = patch.message
583 if options.debug:
584 msg += '\n\n#%d\n' % patch.number
586 gfi.write ('data %d\n%s\n' % (len (msg), msg))
589 mergers = []
590 for (n, p) in pending_patches.items ():
591 if repo.has_patch (p):
592 mergers.append (n)
593 del pending_patches[n]
595 if (last_patch
596 and mergers == []
597 and git_commits.has_key (last_patch.number)):
598 mergers = [last_patch.number]
600 if mergers:
601 gfi.write ('from :%d\n' % (mergers[0] + 1))
602 for m in mergers[1:]:
603 gfi.write ('merge :%d\n' % (m + 1))
605 pending_patches[patch.number] = patch
606 export_tree (repo.pristine_tree (), gfi)
609 n = -1
610 if last_patch:
611 n = last_patch.number
612 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
613 patch)
615 def export_pending (gfi):
616 if len (pending_patches.items ()) == 1:
617 gfi.write ('reset refs/heads/master\n')
618 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
620 progress ("Creating branch master")
621 return
623 for (n, p) in pending_patches.items ():
624 gfi.write ('reset refs/heads/master%d\n' % n)
625 gfi.write ('from :%d\n\n' % (n+1))
627 progress ("Creating branch master%d" % n)
629 patches = pending_patches.values()
630 patch = patches[0]
631 gfi.write ('commit refs/heads/master\n')
632 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
633 patch.author_mail,
634 patch.date))
635 msg = 'tie together'
636 gfi.write ('data %d\n%s\n' % (len(msg), msg))
637 gfi.write ('from :%d\n' % (patch.number + 1))
638 for p in patches[1:]:
639 gfi.write ('merge :%d\n' % (p.number + 1))
640 gfi.write ('\n')
642 def export_tag (patch, gfi):
643 gfi.write ('tag %s\n' % patch.tag_name ())
644 gfi.write ('from :%d\n' % (patch.number + 1))
645 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
646 patch.author_mail,
647 patch.date))
648 gfi.write ('data %d\n%s\n' % (len (patch.message),
649 patch.message))
651 ################################################################
652 # main.
653 def test_conversion (darcs_repo, git_repo):
654 gd = options.basename + '.checkouttmp.git'
655 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
656 diff = read_pipe ('diff --exclude .git -urN %(gd)s %(darcs_repo)s/_darcs/pristine' % locals (), ignore_errors=True)
657 system ('rm -rf %(gd)s' % locals ())
658 if diff:
659 progress ("Conversion introduced changes: %s" % diff)
660 sys.exit (1)
661 else:
662 progress ("Checkout matches pristine darcs tree.")
664 def main ():
665 (options, args) = get_cli_options ()
667 darcs_repo = os.path.abspath (args[0])
668 git_repo = os.path.abspath (options.target_git_repo)
670 if os.path.exists (git_repo):
671 system ('rm -rf %(git_repo)s' % locals ())
673 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
674 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
676 os.environ['GIT_DIR'] = git_repo
678 gfi = os.popen ('git-fast-import --quiet', 'w')
680 patches = get_darcs_patches (darcs_repo)
681 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
682 conv_repo.start_at (-1)
684 for p in patches:
686 parent_patch = None
687 parent_number = -1
689 combinations = [(v, w) for v in pending_patches.values ()
690 for w in pending_patches.values ()]
691 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
692 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
693 for (depth, c) in candidates:
694 q = c.darcs_patch
695 try:
696 conv_repo.go_from_to (q, p)
698 parent_patch = q
699 parent_number = q.number
700 progress ('Found existing common parent as predecessor')
701 break
703 except PullConflict:
704 pass
706 ## no branches found where we could attach.
707 ## try previous commits one by one.
708 if not parent_patch:
709 parent_number = p.number - 2
710 while 1:
711 if parent_number >= 0:
712 parent_patch = patches[parent_number]
714 try:
715 conv_repo.go_from_to (parent_patch, p)
716 break
717 except PullConflict:
718 ## simplistic, may not be enough.
719 progress ('conflict, going one back')
720 parent_number -= 1
722 if parent_number < 0:
723 break
725 if (options.history_window
726 and parent_number < p.number - options.history_window):
728 parent_number = -2
729 break
731 if parent_number >= 0 or p.number == 0:
732 progress ('Export %d -> %d (total %d)' % (parent_number,
733 p.number, len (patches)))
734 export_commit (conv_repo, p, parent_patch, gfi)
735 if p.tag_name ():
736 export_tag (p, gfi)
738 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
739 export_checkpoint (gfi)
740 else:
741 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
743 export_pending (gfi)
744 gfi.close ()
746 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
747 os.unlink (f)
749 test_conversion (darcs_repo, git_repo)
751 if not options.debug:
752 conv_repo.clean ()
754 main ()