Implemented timezone migration
[darcs2git.git] / darcs2git.py
blobbbcb73904bb382f6c0c6758c2a6528fd529b5595
1 #!/usr/bin/env python
3 # Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
5 # Distributed under terms of the GNU General Public License
6 # This program comes with NO WARRANTY.
11 # TODO:
13 # - time zones
15 # - file modes
17 # - use binary search to find from-patch ica. conflict.
20 import urlparse
21 import distutils.version
22 import glob
23 import os
24 import sys
25 import time
26 import xml.dom.minidom
27 import re
28 import gdbm as dbmodule
29 import gzip
30 import optparse
31 from email.utils import parsedate_tz
33 ################################################################
34 # globals
37 log_file = None
38 options = None
39 mail_to_name_dict = {}
40 pending_patches = {}
41 git_commits = {}
42 used_tags = {}
44 ################################################################
45 # utils
47 class PullConflict (Exception):
48 pass
49 class CommandFailed (Exception):
50 pass
52 def progress (s):
53 sys.stderr.write (s + '\n')
55 def get_cli_options ():
56 class MyOP(optparse.OptionParser):
57 def print_help(self):
58 optparse.OptionParser.print_help (self)
59 print '''
60 DESCRIPTION
62 This tool is a conversion utility for Darcs repositories, importing
63 them in chronological order. It requires a Git version that has
64 git-fast-import. It does not support incremental updating.
66 BUGS
68 * repositories with skewed timestamps, or different patches with
69 equal timestamps will confuse darcs2git.
70 * does not respect file modes or time zones.
71 * too slow. See source code for instructions to speed it up.
72 * probably doesn\'t work on partial repositories
74 Report new bugs to hanwen@xs4all.nl
76 LICENSE
78 Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>.
79 Distributed under terms of the GNU General Public License
80 This program comes with NO WARRANTY.
81 '''
83 p = MyOP ()
85 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
86 p.description='''Convert darcs repo to git.'''
88 def update_map (option, opt, value, parser):
89 for l in open (value).readlines ():
90 (mail, name) = tuple (l.strip ().split ('='))
91 mail_to_name_dict[mail] = name
93 p.add_option ('-a', '--authors', action='callback',
94 callback=update_map,
95 type='string',
96 nargs=1,
97 help='read a text file, containing EMAIL=NAME lines')
99 p.add_option ('--checkpoint-frequency', action='store',
100 dest='checkpoint_frequency',
101 type='int',
102 default=0,
103 help='how often should the git importer be synced?\n'
104 'Default is 0 (no limit)'
107 p.add_option ('-d', '--destination', action='store',
108 type='string',
109 default='',
110 dest='target_git_repo',
111 help='where to put the resulting Git repo.')
113 p.add_option ('--verbose', action='store_true',
114 dest='verbose',
115 default=False,
116 help='show commands as they are invoked')
118 p.add_option ('--history-window', action='store',
119 dest='history_window',
120 type='int',
121 default=0,
122 help='Look back this many patches as conflict ancestors.\n'
123 'Default is 0 (no limit)'
126 p.add_option ('--debug', action='store_true',
127 dest='debug',
128 default=False,
129 help="""add patch numbers to commit messages;
130 don\'t clean conversion repo;
131 test end result.""")
133 global options
134 options, args = p.parse_args ()
135 if not args:
136 p.print_help ()
137 sys.exit (2)
139 if len(urlparse.urlparse(args[0])) == 0:
140 raise NotImplementedError,"We support local DARCS repos only."
142 git_version = distutils.version.LooseVersion(os.popen("git --version","r").read().strip().split(" ")[-1])
143 ideal_version = distutils.version.LooseVersion("1.5.0")
144 if git_version<ideal_version:
145 raise RuntimeError,"You need git >= 1.5.0 for this."
147 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
148 if not options.target_git_repo:
149 options.target_git_repo = options.basename + '.git'
151 if options.debug:
152 global log_file
153 name = options.target_git_repo.replace ('.git', '.log')
154 if name == options.target_git_repo:
155 name += '.log'
157 progress ("Shell log to %s" % name)
158 log_file = open (name, 'w')
160 return (options, args)
162 def read_pipe (cmd, ignore_errors=False):
163 if options.verbose:
164 progress ('pipe %s' % cmd)
165 pipe = os.popen (cmd)
167 val = pipe.read ()
168 if pipe.close () and not ignore_errors:
169 raise CommandFailed ("Pipe failed: %s" % cmd)
171 return val
173 def system (c, ignore_error=0, timed=0):
174 if timed:
175 c = "time " + c
176 if options.verbose:
177 progress (c)
179 if log_file:
180 log_file.write ('%s\n' % c)
181 log_file.flush ()
183 if os.system (c) and not ignore_error:
184 raise CommandFailed ("Command failed: %s" % c)
186 def darcs_date_to_git (x):
187 t = time.strptime (x, '%Y%m%d%H%M%S')
188 return '%d' % int (time.mktime (t))
190 def darcs_timezone (x) :
191 tz = parsedate_tz(x)[9] / 60
192 return "%+03d%02d" % (tz / 60, tz % 60)
194 ################################################################
195 # darcs
197 class DarcsConversionRepo:
198 """Representation of a Darcs repo.
200 The repo is thought to be ordered, and supports methods for
201 going back (obliterate) and forward (pull).
205 def __init__ (self, dir, patches):
206 self.dir = os.path.abspath (dir)
207 self.patches = patches
208 self._current_number = -1
209 self._is_valid = -1
210 self._inventory_dict = None
212 self._short_id_dict = dict ((p.short_id (), p) for p in patches)
214 def is_contiguous (self):
215 return (len (self.inventory_dict ()) == self._current_number+1
216 and self.contains_contiguous (self._current_number))
218 def contains_contiguous (self, num):
219 if not self._is_valid:
220 return False
222 darcs_dir = self.dir + '/_darcs'
223 if not os.path.exists (darcs_dir):
224 return False
226 for p in self.patches[:num + 1]:
227 if not self.has_patch (p):
228 return False
230 return True
232 def has_patch (self, p):
233 assert self._is_valid
235 return self.inventory_dict ().has_key (p.short_id ())
237 def pristine_tree (self):
238 return self.dir + '/_darcs/pristine'
240 def go_back_to (self, dest):
242 # at 4, len = 5, go to 2: count == 2
243 count = len (self.inventory_dict()) - dest - 1
245 assert self._is_valid
246 assert count > 0
248 self.checkout ()
249 dir = self.dir
251 progress ('Rewinding %d patches' % count)
252 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
253 d = self.inventory_dict ()
254 for p in self.patches[dest+1:self._current_number+1]:
255 try:
256 del d[p.short_id ()]
257 except KeyError:
258 pass
260 self._current_number = dest
262 def clean (self):
263 system ('rm -rf %s' % self.dir)
265 def checkout (self):
266 dir = self.dir
267 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
269 def pull (self, patch):
270 id = patch.attributes['hash']
271 source_repo = patch.dir
272 dir = self.dir
274 progress ('Pull patch %d' % patch.number)
275 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
277 self._current_number = patch.number
279 ## must reread: the pull may have pulled in others.
280 self._inventory_dict = None
282 def go_forward_to (self, num):
283 d = self.inventory_dict ()
285 pull_me = []
287 ## ugh
288 for p in self.patches[0:num+1]:
289 if not d.has_key (p.short_id ()):
290 pull_me.append (p)
291 d[p.short_id ()] = p
293 pull_str = ' || '.join (['hash %s' % p.id () for p in pull_me])
294 dir = self.dir
295 src = self.patches[0].dir
297 progress ('Pulling %d patches to go to %d' % (len (pull_me), num))
298 system ('darcs pull --all --repo %(dir)s --match "%(pull_str)s" %(src)s' % locals ())
300 def create_fresh (self):
301 dir = self.dir
302 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
303 % locals ())
304 self._is_valid = True
305 self._current_number = -1
306 self._inventory_dict = {}
308 def inventory (self):
309 darcs_dir = self.dir + '/_darcs'
310 i = ''
311 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
312 i += open (f).read ()
313 return i
315 def inventory_dict (self):
316 if type (self._inventory_dict) != type ({}):
317 self._inventory_dict = {}
319 def note_patch (m):
320 self._inventory_dict[m.group (1)] = self._short_id_dict[m.group(1)]
322 re.sub (r'\n([^*\n]+\*[*-][0-9]+)', note_patch, self.inventory ())
323 return self._inventory_dict
325 def start_at (self, num):
327 """Move the repo to NUM.
329 This uses the fishy technique of writing the inventory and
330 constructing the pristine tree with 'darcs repair'
332 progress ('Starting afresh at %d' % num)
334 self.create_fresh ()
335 dir = self.dir
336 iv = open (dir + '/_darcs/inventory', 'w')
337 if log_file:
338 log_file.write ("# messing with _darcs/inventory")
340 for p in self.patches[:num+1]:
341 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
342 iv.write (p.header ())
343 self._inventory_dict[p.short_id ()] = p
344 iv.close ()
346 system ('darcs repair --repo %(dir)s --quiet' % locals ())
347 self.checkout ()
348 self._current_number = num
349 self._is_valid = True
351 def go_to (self, dest):
352 contiguous = self.is_contiguous ()
354 if not self._is_valid:
355 self.start_at (dest)
356 elif dest == self._current_number and contiguous:
357 pass
358 elif (self.contains_contiguous (dest)):
359 self.go_back_to (dest)
360 elif dest - len (self.inventory_dict ()) < dest / 100:
361 self.go_forward_to (dest)
362 else:
363 self.start_at (dest)
366 def go_from_to (self, from_patch, to_patch):
368 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
369 PullConflict if conflict is detected
372 progress ('Trying %s -> %s' % (from_patch, to_patch))
373 dir = self.dir
374 source = to_patch.dir
376 if from_patch:
377 self.go_to (from_patch.number)
378 else:
379 self.create_fresh ()
381 try:
382 self.pull (to_patch)
383 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
384 except CommandFailed:
385 self._is_valid = False
386 raise PullConflict ()
388 if not success:
389 raise PullConflict ()
391 class DarcsPatch:
392 def __repr__ (self):
393 return 'patch %d' % self.number
395 def __init__ (self, xml, dir):
396 self.xml = xml
397 self.dir = dir
398 self.number = -1
399 self.attributes = {}
400 self._contents = None
401 for (nm, value) in xml.attributes.items():
402 self.attributes[nm] = value
404 # fixme: ugh attributes vs. methods.
405 self.extract_author ()
406 self.extract_message ()
407 self.extract_time ()
409 def id (self):
410 return self.attributes['hash']
412 def short_id (self):
413 inv = '*'
414 if self.attributes['inverted'] == 'True':
415 inv = '-'
417 return '%s*%s%s' % (self.attributes['author'], inv, self.attributes['hash'].split ('-')[0])
419 def filename (self):
420 return self.dir + '/_darcs/patches/' + self.attributes['hash']
422 def contents (self):
423 if type (self._contents) != type (''):
424 f = gzip.open (self.filename ())
425 self._contents = f.read ()
427 return self._contents
429 def header (self):
430 lines = self.contents ().split ('\n')
432 name = lines[0]
433 committer = lines[1] + '\n'
434 committer = re.sub ('] {\n$', ']\n', committer)
435 committer = re.sub ('] *\n$', ']\n', committer)
436 comment = ''
437 if not committer.endswith (']\n'):
438 for l in lines[2:]:
439 if l[0] == ']':
440 comment += ']\n'
441 break
442 comment += l + '\n'
444 header = name + '\n' + committer
445 if comment:
446 header += comment
448 assert header[-1] == '\n'
449 return header
451 def extract_author (self):
452 mail = self.attributes['author']
453 name = ''
454 m = re.search ("^(.*) <(.*)>$", mail)
456 if m:
457 name = m.group (1)
458 mail = m.group (2)
459 else:
460 try:
461 name = mail_to_name_dict[mail]
462 except KeyError:
463 name = mail.split ('@')[0]
465 self.author_name = name
466 self.author_mail = mail
468 def extract_time (self):
469 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
471 def name (self):
472 patch_name = '(no comment)'
473 try:
474 name_elt = self.xml.getElementsByTagName ('name')[0]
475 patch_name = name_elt.childNodes[0].data
476 except IndexError:
477 pass
478 return patch_name
480 def extract_message (self):
481 patch_name = self.name ()
482 comment_elts = self.xml.getElementsByTagName ('comment')
483 comment = ''
484 if comment_elts:
485 comment = comment_elts[0].childNodes[0].data
487 if self.attributes['inverted'] == 'True':
488 patch_name = 'UNDO: ' + patch_name
490 self.message = '%s\n\n%s' % (patch_name, comment)
492 def tag_name (self):
493 patch_name = self.name ()
494 if patch_name.startswith ("TAG "):
495 tag = patch_name[4:]
496 tag = re.sub (r'\s', '_', tag).strip ()
497 tag = re.sub (r':', '_', tag).strip ()
498 return tag
499 return ''
501 def get_darcs_patches (darcs_repo):
502 progress ('reading patches.')
504 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
506 dom = xml.dom.minidom.parseString(xml_string)
507 xmls = dom.documentElement.getElementsByTagName('patch')
509 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
511 n = 0
512 for p in patches:
513 p.number = n
514 n += 1
516 return patches
518 ################################################################
519 # GIT export
521 class GitCommit:
522 def __init__ (self, parent, darcs_patch):
523 self.parent = parent
524 self.darcs_patch = darcs_patch
525 if parent:
526 self.depth = parent.depth + 1
527 else:
528 self.depth = 0
530 def number (self):
531 return self.darcs_patch.number
533 def parent_patch (self):
534 if self.parent:
535 return self.parent.darcs_patch
536 else:
537 return None
539 def common_ancestor (a, b):
540 while 1:
541 if a.depth < b.depth:
542 b = b.parent
543 elif a.depth > b.depth:
544 a = a.parent
545 else:
546 break
548 while a and b:
549 if a == b:
550 return a
552 a = a.parent
553 b = b.parent
555 return None
557 def export_checkpoint (gfi):
558 gfi.write ('checkpoint\n\n')
560 def export_tree (tree, gfi):
561 tree = os.path.normpath (tree)
562 gfi.write ('deleteall\n')
563 for (root, dirs, files) in os.walk (tree):
564 for f in files:
565 rf = os.path.normpath (os.path.join (root, f))
566 s = open (rf).read ()
567 rf = rf.replace (tree + '/', '')
569 gfi.write ('M 644 inline %s\n' % rf)
570 gfi.write ('data %d\n%s\n' % (len (s), s))
571 gfi.write ('\n')
574 def export_commit (repo, patch, last_patch, gfi):
575 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
576 gfi.write ('mark :%d\n' % (patch.number + 1))
577 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
578 patch.author_mail,
579 patch.date))
581 msg = patch.message
582 if options.debug:
583 msg += '\n\n#%d\n' % patch.number
585 gfi.write ('data %d\n%s\n' % (len (msg), msg))
588 mergers = []
589 for (n, p) in pending_patches.items ():
590 if repo.has_patch (p):
591 mergers.append (n)
592 del pending_patches[n]
594 if (last_patch
595 and mergers == []
596 and git_commits.has_key (last_patch.number)):
597 mergers = [last_patch.number]
599 if mergers:
600 gfi.write ('from :%d\n' % (mergers[0] + 1))
601 for m in mergers[1:]:
602 gfi.write ('merge :%d\n' % (m + 1))
604 pending_patches[patch.number] = patch
605 export_tree (repo.pristine_tree (), gfi)
608 n = -1
609 if last_patch:
610 n = last_patch.number
611 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
612 patch)
614 def export_pending (gfi):
615 if len (pending_patches.items ()) == 1:
616 gfi.write ('reset refs/heads/master\n')
617 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
619 progress ("Creating branch master")
620 return
622 for (n, p) in pending_patches.items ():
623 gfi.write ('reset refs/heads/master%d\n' % n)
624 gfi.write ('from :%d\n\n' % (n+1))
626 progress ("Creating branch master%d" % n)
628 patches = pending_patches.values()
629 patch = patches[0]
630 gfi.write ('commit refs/heads/master\n')
631 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
632 patch.author_mail,
633 patch.date))
634 msg = 'tie together'
635 gfi.write ('data %d\n%s\n' % (len(msg), msg))
636 gfi.write ('from :%d\n' % (patch.number + 1))
637 for p in patches[1:]:
638 gfi.write ('merge :%d\n' % (p.number + 1))
639 gfi.write ('\n')
641 def export_tag (patch, gfi):
642 gfi.write ('tag %s\n' % patch.tag_name ())
643 gfi.write ('from :%d\n' % (patch.number + 1))
644 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
645 patch.author_mail,
646 patch.date))
647 gfi.write ('data %d\n%s\n' % (len (patch.message),
648 patch.message))
650 ################################################################
651 # main.
652 def test_conversion (darcs_repo, git_repo):
653 gd = options.basename + '.checkouttmp.git'
654 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
655 diff = read_pipe ('diff --exclude .git -urN %(gd)s %(darcs_repo)s/_darcs/pristine' % locals (), ignore_errors=True)
656 system ('rm -rf %(gd)s' % locals ())
657 if diff:
658 progress ("Conversion introduced changes: %s" % diff)
659 sys.exit (1)
660 else:
661 progress ("Checkout matches pristine darcs tree.")
663 def main ():
664 (options, args) = get_cli_options ()
666 darcs_repo = os.path.abspath (args[0])
667 git_repo = os.path.abspath (options.target_git_repo)
669 if os.path.exists (git_repo):
670 system ('rm -rf %(git_repo)s' % locals ())
672 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
673 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
675 os.environ['GIT_DIR'] = git_repo
677 gfi = os.popen ('git-fast-import --quiet', 'w')
679 patches = get_darcs_patches (darcs_repo)
680 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
681 conv_repo.start_at (-1)
683 for p in patches:
685 parent_patch = None
686 parent_number = -1
688 combinations = [(v, w) for v in pending_patches.values ()
689 for w in pending_patches.values ()]
690 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
691 candidates = sorted ([(-a.darcs_patch.number, a) for a in candidates])
692 for (depth, c) in candidates:
693 q = c.darcs_patch
694 try:
695 conv_repo.go_from_to (q, p)
697 parent_patch = q
698 parent_number = q.number
699 progress ('Found existing common parent as predecessor')
700 break
702 except PullConflict:
703 pass
705 ## no branches found where we could attach.
706 ## try previous commits one by one.
707 if not parent_patch:
708 parent_number = p.number - 2
709 while 1:
710 if parent_number >= 0:
711 parent_patch = patches[parent_number]
713 try:
714 conv_repo.go_from_to (parent_patch, p)
715 break
716 except PullConflict:
717 ## simplistic, may not be enough.
718 progress ('conflict, going one back')
719 parent_number -= 1
721 if parent_number < 0:
722 break
724 if (options.history_window
725 and parent_number < p.number - options.history_window):
727 parent_number = -2
728 break
730 if parent_number >= 0 or p.number == 0:
731 progress ('Export %d -> %d (total %d)' % (parent_number,
732 p.number, len (patches)))
733 export_commit (conv_repo, p, parent_patch, gfi)
734 if p.tag_name ():
735 export_tag (p, gfi)
737 if options.checkpoint_frequency and p.number % options.checkpoint_frequency == 0:
738 export_checkpoint (gfi)
739 else:
740 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
742 export_pending (gfi)
743 gfi.close ()
745 for f in glob.glob ('%(git_repo)s/refs/heads/darcstmp*' % locals ()):
746 os.unlink (f)
748 test_conversion (darcs_repo, git_repo)
750 if not options.debug:
751 conv_repo.clean ()
753 main ()