Merge branch 'fc/remote-hg'
[git/mingw.git] / contrib / remote-helpers / git-remote-hg
blob548133121d23a328d76d68d058fd4fb14b60fb16
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 # For remote repositories a local clone is stored in
13 # "$GIT_DIR/hg/origin/clone/.hg/".
15 from mercurial import hg, ui, bookmarks, context, util, encoding, node, error
17 import re
18 import sys
19 import os
20 import json
21 import shutil
22 import subprocess
23 import urllib
24 import atexit
27 # If you want to switch to hg-git compatibility mode:
28 # git config --global remote-hg.hg-git-compat true
30 # If you are not in hg-git-compat mode and want to disable the tracking of
31 # named branches:
32 # git config --global remote-hg.track-branches false
34 # If you don't want to force pushes (and thus risk creating new remote heads):
35 # git config --global remote-hg.force-push false
37 # If you want the equivalent of hg's clone/pull--insecure option:
38 # git config remote-hg.insecure true
40 # git:
41 # Sensible defaults for git.
42 # hg bookmarks are exported as git branches, hg branches are prefixed
43 # with 'branches/', HEAD is a special case.
45 # hg:
46 # Emulate hg-git.
47 # Only hg bookmarks are exported as git branches.
48 # Commits are modified to preserve hg information and allow bidirectionality.
51 NAME_RE = re.compile('^([^<>]+)')
52 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
53 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
54 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
56 def die(msg, *args):
57 sys.stderr.write('ERROR: %s\n' % (msg % args))
58 sys.exit(1)
60 def warn(msg, *args):
61 sys.stderr.write('WARNING: %s\n' % (msg % args))
63 def gitmode(flags):
64 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
66 def gittz(tz):
67 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
69 def hgmode(mode):
70 m = { '100755': 'x', '120000': 'l' }
71 return m.get(mode, '')
73 def hghex(node):
74 return hg.node.hex(node)
76 def get_config(config):
77 cmd = ['git', 'config', '--get', config]
78 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
79 output, _ = process.communicate()
80 return output
82 class Marks:
84 def __init__(self, path):
85 self.path = path
86 self.tips = {}
87 self.marks = {}
88 self.rev_marks = {}
89 self.last_mark = 0
91 self.load()
93 def load(self):
94 if not os.path.exists(self.path):
95 return
97 tmp = json.load(open(self.path))
99 self.tips = tmp['tips']
100 self.marks = tmp['marks']
101 self.last_mark = tmp['last-mark']
103 for rev, mark in self.marks.iteritems():
104 self.rev_marks[mark] = int(rev)
106 def dict(self):
107 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
109 def store(self):
110 json.dump(self.dict(), open(self.path, 'w'))
112 def __str__(self):
113 return str(self.dict())
115 def from_rev(self, rev):
116 return self.marks[str(rev)]
118 def to_rev(self, mark):
119 return self.rev_marks[mark]
121 def get_mark(self, rev):
122 self.last_mark += 1
123 self.marks[str(rev)] = self.last_mark
124 return self.last_mark
126 def new_mark(self, rev, mark):
127 self.marks[str(rev)] = mark
128 self.rev_marks[mark] = rev
129 self.last_mark = mark
131 def is_marked(self, rev):
132 return self.marks.has_key(str(rev))
134 def get_tip(self, branch):
135 return self.tips.get(branch, 0)
137 def set_tip(self, branch, tip):
138 self.tips[branch] = tip
140 class Parser:
142 def __init__(self, repo):
143 self.repo = repo
144 self.line = self.get_line()
146 def get_line(self):
147 return sys.stdin.readline().strip()
149 def __getitem__(self, i):
150 return self.line.split()[i]
152 def check(self, word):
153 return self.line.startswith(word)
155 def each_block(self, separator):
156 while self.line != separator:
157 yield self.line
158 self.line = self.get_line()
160 def __iter__(self):
161 return self.each_block('')
163 def next(self):
164 self.line = self.get_line()
165 if self.line == 'done':
166 self.line = None
168 def get_mark(self):
169 i = self.line.index(':') + 1
170 return int(self.line[i:])
172 def get_data(self):
173 if not self.check('data'):
174 return None
175 i = self.line.index(' ') + 1
176 size = int(self.line[i:])
177 return sys.stdin.read(size)
179 def get_author(self):
180 global bad_mail
182 ex = None
183 m = RAW_AUTHOR_RE.match(self.line)
184 if not m:
185 return None
186 _, name, email, date, tz = m.groups()
187 if name and 'ext:' in name:
188 m = re.match('^(.+?) ext:\((.+)\)$', name)
189 if m:
190 name = m.group(1)
191 ex = urllib.unquote(m.group(2))
193 if email != bad_mail:
194 if name:
195 user = '%s <%s>' % (name, email)
196 else:
197 user = '<%s>' % (email)
198 else:
199 user = name
201 if ex:
202 user += ex
204 tz = int(tz)
205 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
206 return (user, int(date), -tz)
208 def fix_file_path(path):
209 if not os.path.isabs(path):
210 return path
211 return os.path.relpath(path, '/')
213 def export_file(fc):
214 d = fc.data()
215 path = fix_file_path(fc.path())
216 print "M %s inline %s" % (gitmode(fc.flags()), path)
217 print "data %d" % len(d)
218 print d
220 def get_filechanges(repo, ctx, parent):
221 modified = set()
222 added = set()
223 removed = set()
225 cur = ctx.manifest()
226 prev = repo[parent].manifest().copy()
228 for fn in cur:
229 if fn in prev:
230 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
231 modified.add(fn)
232 del prev[fn]
233 else:
234 added.add(fn)
235 removed |= set(prev.keys())
237 return added | modified, removed
239 def fixup_user_git(user):
240 name = mail = None
241 user = user.replace('"', '')
242 m = AUTHOR_RE.match(user)
243 if m:
244 name = m.group(1)
245 mail = m.group(2).strip()
246 else:
247 m = NAME_RE.match(user)
248 if m:
249 name = m.group(1).strip()
250 return (name, mail)
252 def fixup_user_hg(user):
253 def sanitize(name):
254 # stole this from hg-git
255 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
257 m = AUTHOR_HG_RE.match(user)
258 if m:
259 name = sanitize(m.group(1))
260 mail = sanitize(m.group(2))
261 ex = m.group(3)
262 if ex:
263 name += ' ext:(' + urllib.quote(ex) + ')'
264 else:
265 name = sanitize(user)
266 if '@' in user:
267 mail = name
268 else:
269 mail = None
271 return (name, mail)
273 def fixup_user(user):
274 global mode, bad_mail
276 if mode == 'git':
277 name, mail = fixup_user_git(user)
278 else:
279 name, mail = fixup_user_hg(user)
281 if not name:
282 name = bad_name
283 if not mail:
284 mail = bad_mail
286 return '%s <%s>' % (name, mail)
288 def get_repo(url, alias):
289 global dirname, peer
291 myui = ui.ui()
292 myui.setconfig('ui', 'interactive', 'off')
293 myui.fout = sys.stderr
295 try:
296 if get_config('remote-hg.insecure') == 'true\n':
297 myui.setconfig('web', 'cacerts', '')
298 except subprocess.CalledProcessError:
299 pass
301 if hg.islocal(url):
302 repo = hg.repository(myui, url)
303 else:
304 local_path = os.path.join(dirname, 'clone')
305 if not os.path.exists(local_path):
306 try:
307 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
308 except:
309 die('Repository error')
310 repo = dstpeer.local()
311 else:
312 repo = hg.repository(myui, local_path)
313 try:
314 peer = hg.peer(myui, {}, url)
315 except:
316 die('Repository error')
317 repo.pull(peer, heads=None, force=True)
319 return repo
321 def rev_to_mark(rev):
322 global marks
323 return marks.from_rev(rev)
325 def mark_to_rev(mark):
326 global marks
327 return marks.to_rev(mark)
329 def export_ref(repo, name, kind, head):
330 global prefix, marks, mode
332 ename = '%s/%s' % (kind, name)
333 tip = marks.get_tip(ename)
335 # mercurial takes too much time checking this
336 if tip and tip == head.rev():
337 # nothing to do
338 return
339 revs = xrange(tip, head.rev() + 1)
340 count = 0
342 revs = [rev for rev in revs if not marks.is_marked(rev)]
344 for rev in revs:
346 c = repo[rev]
347 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
348 rev_branch = extra['branch']
350 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
351 if 'committer' in extra:
352 user, time, tz = extra['committer'].rsplit(' ', 2)
353 committer = "%s %s %s" % (user, time, gittz(int(tz)))
354 else:
355 committer = author
357 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
359 if len(parents) == 0:
360 modified = c.manifest().keys()
361 removed = []
362 else:
363 modified, removed = get_filechanges(repo, c, parents[0])
365 desc += '\n'
367 if mode == 'hg':
368 extra_msg = ''
370 if rev_branch != 'default':
371 extra_msg += 'branch : %s\n' % rev_branch
373 renames = []
374 for f in c.files():
375 if f not in c.manifest():
376 continue
377 rename = c.filectx(f).renamed()
378 if rename:
379 renames.append((rename[0], f))
381 for e in renames:
382 extra_msg += "rename : %s => %s\n" % e
384 for key, value in extra.iteritems():
385 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
386 continue
387 else:
388 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
390 if extra_msg:
391 desc += '\n--HG--\n' + extra_msg
393 if len(parents) == 0 and rev:
394 print 'reset %s/%s' % (prefix, ename)
396 print "commit %s/%s" % (prefix, ename)
397 print "mark :%d" % (marks.get_mark(rev))
398 print "author %s" % (author)
399 print "committer %s" % (committer)
400 print "data %d" % (len(desc))
401 print desc
403 if len(parents) > 0:
404 print "from :%s" % (rev_to_mark(parents[0]))
405 if len(parents) > 1:
406 print "merge :%s" % (rev_to_mark(parents[1]))
408 for f in modified:
409 export_file(c.filectx(f))
410 for f in removed:
411 print "D %s" % (fix_file_path(f))
412 print
414 count += 1
415 if (count % 100 == 0):
416 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
417 print "#############################################################"
419 # make sure the ref is updated
420 print "reset %s/%s" % (prefix, ename)
421 print "from :%u" % rev_to_mark(rev)
422 print
424 marks.set_tip(ename, rev)
426 def export_tag(repo, tag):
427 export_ref(repo, tag, 'tags', repo[tag])
429 def export_bookmark(repo, bmark):
430 head = bmarks[bmark]
431 export_ref(repo, bmark, 'bookmarks', head)
433 def export_branch(repo, branch):
434 tip = get_branch_tip(repo, branch)
435 head = repo[tip]
436 export_ref(repo, branch, 'branches', head)
438 def export_head(repo):
439 global g_head
440 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
442 def do_capabilities(parser):
443 global prefix, dirname
445 print "import"
446 print "export"
447 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
448 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
449 print "refspec refs/tags/*:%s/tags/*" % prefix
451 path = os.path.join(dirname, 'marks-git')
453 if os.path.exists(path):
454 print "*import-marks %s" % path
455 print "*export-marks %s" % path
457 print
459 def get_branch_tip(repo, branch):
460 global branches
462 heads = branches.get(branch, None)
463 if not heads:
464 return None
466 # verify there's only one head
467 if (len(heads) > 1):
468 warn("Branch '%s' has more than one head, consider merging" % branch)
469 # older versions of mercurial don't have this
470 if hasattr(repo, "branchtip"):
471 return repo.branchtip(branch)
473 return heads[0]
475 def list_head(repo, cur):
476 global g_head, bmarks
478 head = bookmarks.readcurrent(repo)
479 if head:
480 node = repo[head]
481 else:
482 # fake bookmark from current branch
483 head = cur
484 node = repo['.']
485 if not node:
486 node = repo['tip']
487 if not node:
488 return
489 if head == 'default':
490 head = 'master'
491 bmarks[head] = node
493 print "@refs/heads/%s HEAD" % head
494 g_head = (head, node)
496 def do_list(parser):
497 global branches, bmarks, mode, track_branches
499 repo = parser.repo
500 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
501 bmarks[bmark] = repo[node]
503 cur = repo.dirstate.branch()
505 list_head(repo, cur)
507 if track_branches:
508 for branch in repo.branchmap():
509 heads = repo.branchheads(branch)
510 if len(heads):
511 branches[branch] = heads
513 for branch in branches:
514 print "? refs/heads/branches/%s" % branch
516 for bmark in bmarks:
517 print "? refs/heads/%s" % bmark
519 for tag, node in repo.tagslist():
520 if tag == 'tip':
521 continue
522 print "? refs/tags/%s" % tag
524 print
526 def do_import(parser):
527 repo = parser.repo
529 path = os.path.join(dirname, 'marks-git')
531 print "feature done"
532 if os.path.exists(path):
533 print "feature import-marks=%s" % path
534 print "feature export-marks=%s" % path
535 sys.stdout.flush()
537 tmp = encoding.encoding
538 encoding.encoding = 'utf-8'
540 # lets get all the import lines
541 while parser.check('import'):
542 ref = parser[1]
544 if (ref == 'HEAD'):
545 export_head(repo)
546 elif ref.startswith('refs/heads/branches/'):
547 branch = ref[len('refs/heads/branches/'):]
548 export_branch(repo, branch)
549 elif ref.startswith('refs/heads/'):
550 bmark = ref[len('refs/heads/'):]
551 export_bookmark(repo, bmark)
552 elif ref.startswith('refs/tags/'):
553 tag = ref[len('refs/tags/'):]
554 export_tag(repo, tag)
556 parser.next()
558 encoding.encoding = tmp
560 print 'done'
562 def parse_blob(parser):
563 global blob_marks
565 parser.next()
566 mark = parser.get_mark()
567 parser.next()
568 data = parser.get_data()
569 blob_marks[mark] = data
570 parser.next()
572 def get_merge_files(repo, p1, p2, files):
573 for e in repo[p1].files():
574 if e not in files:
575 if e not in repo[p1].manifest():
576 continue
577 f = { 'ctx' : repo[p1][e] }
578 files[e] = f
580 def parse_commit(parser):
581 global marks, blob_marks, parsed_refs
582 global mode
584 from_mark = merge_mark = None
586 ref = parser[1]
587 parser.next()
589 commit_mark = parser.get_mark()
590 parser.next()
591 author = parser.get_author()
592 parser.next()
593 committer = parser.get_author()
594 parser.next()
595 data = parser.get_data()
596 parser.next()
597 if parser.check('from'):
598 from_mark = parser.get_mark()
599 parser.next()
600 if parser.check('merge'):
601 merge_mark = parser.get_mark()
602 parser.next()
603 if parser.check('merge'):
604 die('octopus merges are not supported yet')
606 files = {}
608 for line in parser:
609 if parser.check('M'):
610 t, m, mark_ref, path = line.split(' ', 3)
611 mark = int(mark_ref[1:])
612 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
613 elif parser.check('D'):
614 t, path = line.split(' ', 1)
615 f = { 'deleted' : True }
616 else:
617 die('Unknown file command: %s' % line)
618 files[path] = f
620 def getfilectx(repo, memctx, f):
621 of = files[f]
622 if 'deleted' in of:
623 raise IOError
624 if 'ctx' in of:
625 return of['ctx']
626 is_exec = of['mode'] == 'x'
627 is_link = of['mode'] == 'l'
628 rename = of.get('rename', None)
629 return context.memfilectx(f, of['data'],
630 is_link, is_exec, rename)
632 repo = parser.repo
634 user, date, tz = author
635 extra = {}
637 if committer != author:
638 extra['committer'] = "%s %u %u" % committer
640 if from_mark:
641 p1 = repo.changelog.node(mark_to_rev(from_mark))
642 else:
643 p1 = '\0' * 20
645 if merge_mark:
646 p2 = repo.changelog.node(mark_to_rev(merge_mark))
647 else:
648 p2 = '\0' * 20
651 # If files changed from any of the parents, hg wants to know, but in git if
652 # nothing changed from the first parent, nothing changed.
654 if merge_mark:
655 get_merge_files(repo, p1, p2, files)
657 # Check if the ref is supposed to be a named branch
658 if ref.startswith('refs/heads/branches/'):
659 extra['branch'] = ref[len('refs/heads/branches/'):]
661 if mode == 'hg':
662 i = data.find('\n--HG--\n')
663 if i >= 0:
664 tmp = data[i + len('\n--HG--\n'):].strip()
665 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
666 if k == 'rename':
667 old, new = v.split(' => ', 1)
668 files[new]['rename'] = old
669 elif k == 'branch':
670 extra[k] = v
671 elif k == 'extra':
672 ek, ev = v.split(' : ', 1)
673 extra[ek] = urllib.unquote(ev)
674 data = data[:i]
676 ctx = context.memctx(repo, (p1, p2), data,
677 files.keys(), getfilectx,
678 user, (date, tz), extra)
680 tmp = encoding.encoding
681 encoding.encoding = 'utf-8'
683 node = repo.commitctx(ctx)
685 encoding.encoding = tmp
687 rev = repo[node].rev()
689 parsed_refs[ref] = node
690 marks.new_mark(rev, commit_mark)
692 def parse_reset(parser):
693 global parsed_refs
695 ref = parser[1]
696 parser.next()
697 # ugh
698 if parser.check('commit'):
699 parse_commit(parser)
700 return
701 if not parser.check('from'):
702 return
703 from_mark = parser.get_mark()
704 parser.next()
706 node = parser.repo.changelog.node(mark_to_rev(from_mark))
707 parsed_refs[ref] = node
709 def parse_tag(parser):
710 name = parser[1]
711 parser.next()
712 from_mark = parser.get_mark()
713 parser.next()
714 tagger = parser.get_author()
715 parser.next()
716 data = parser.get_data()
717 parser.next()
719 # nothing to do
721 def do_export(parser):
722 global parsed_refs, bmarks, peer
724 p_bmarks = []
726 parser.next()
728 for line in parser.each_block('done'):
729 if parser.check('blob'):
730 parse_blob(parser)
731 elif parser.check('commit'):
732 parse_commit(parser)
733 elif parser.check('reset'):
734 parse_reset(parser)
735 elif parser.check('tag'):
736 parse_tag(parser)
737 elif parser.check('feature'):
738 pass
739 else:
740 die('unhandled export command: %s' % line)
742 for ref, node in parsed_refs.iteritems():
743 if ref.startswith('refs/heads/branches'):
744 print "ok %s" % ref
745 elif ref.startswith('refs/heads/'):
746 bmark = ref[len('refs/heads/'):]
747 p_bmarks.append((bmark, node))
748 continue
749 elif ref.startswith('refs/tags/'):
750 tag = ref[len('refs/tags/'):]
751 if mode == 'git':
752 msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
753 parser.repo.tag([tag], node, msg, False, None, {})
754 else:
755 parser.repo.tag([tag], node, None, True, None, {})
756 print "ok %s" % ref
757 else:
758 # transport-helper/fast-export bugs
759 continue
761 if peer:
762 parser.repo.push(peer, force=force_push)
764 # handle bookmarks
765 for bmark, node in p_bmarks:
766 ref = 'refs/heads/' + bmark
767 new = hghex(node)
769 if bmark in bmarks:
770 old = bmarks[bmark].hex()
771 else:
772 old = ''
774 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
775 # fake bookmark
776 pass
777 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
778 # updated locally
779 pass
780 else:
781 print "error %s" % ref
782 continue
784 if peer:
785 if not peer.pushkey('bookmarks', bmark, old, new):
786 print "error %s" % ref
787 continue
789 print "ok %s" % ref
791 print
793 def fix_path(alias, repo, orig_url):
794 repo_url = util.url(repo.url())
795 url = util.url(orig_url)
796 if str(url) == str(repo_url):
797 return
798 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
799 subprocess.call(cmd)
801 def main(args):
802 global prefix, dirname, branches, bmarks
803 global marks, blob_marks, parsed_refs
804 global peer, mode, bad_mail, bad_name
805 global track_branches, force_push, is_tmp
807 alias = args[1]
808 url = args[2]
809 peer = None
811 hg_git_compat = False
812 track_branches = True
813 force_push = True
815 try:
816 if get_config('remote-hg.hg-git-compat') == 'true\n':
817 hg_git_compat = True
818 track_branches = False
819 if get_config('remote-hg.track-branches') == 'false\n':
820 track_branches = False
821 if get_config('remote-hg.force-push') == 'false\n':
822 force_push = False
823 except subprocess.CalledProcessError:
824 pass
826 if hg_git_compat:
827 mode = 'hg'
828 bad_mail = 'none@none'
829 bad_name = ''
830 else:
831 mode = 'git'
832 bad_mail = 'unknown'
833 bad_name = 'Unknown'
835 if alias[4:] == url:
836 is_tmp = True
837 alias = util.sha1(alias).hexdigest()
838 else:
839 is_tmp = False
841 gitdir = os.environ['GIT_DIR']
842 dirname = os.path.join(gitdir, 'hg', alias)
843 branches = {}
844 bmarks = {}
845 blob_marks = {}
846 parsed_refs = {}
847 marks = None
849 repo = get_repo(url, alias)
850 prefix = 'refs/hg/%s' % alias
852 if not is_tmp:
853 fix_path(alias, peer or repo, url)
855 if not os.path.exists(dirname):
856 os.makedirs(dirname)
858 marks_path = os.path.join(dirname, 'marks-hg')
859 marks = Marks(marks_path)
861 parser = Parser(repo)
862 for line in parser:
863 if parser.check('capabilities'):
864 do_capabilities(parser)
865 elif parser.check('list'):
866 do_list(parser)
867 elif parser.check('import'):
868 do_import(parser)
869 elif parser.check('export'):
870 do_export(parser)
871 else:
872 die('unhandled command: %s' % line)
873 sys.stdout.flush()
875 def bye():
876 if not marks:
877 return
878 if not is_tmp:
879 marks.store()
880 else:
881 shutil.rmtree(dirname)
883 atexit.register(bye)
884 sys.exit(main(sys.argv))