remote-hg: add branch_tip() helper
[git.git] / contrib / remote-helpers / git-remote-hg
blobbd93f82857e624955614625f6803f6c160f29445
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 # For remote repositories a local clone is stored in
13 # "$GIT_DIR/hg/origin/clone/.hg/".
15 from mercurial import hg, ui, bookmarks, context, util, encoding, node, error
17 import re
18 import sys
19 import os
20 import json
21 import shutil
22 import subprocess
23 import urllib
24 import atexit
25 import urlparse
28 # If you want to switch to hg-git compatibility mode:
29 # git config --global remote-hg.hg-git-compat true
31 # If you are not in hg-git-compat mode and want to disable the tracking of
32 # named branches:
33 # git config --global remote-hg.track-branches false
35 # If you don't want to force pushes (and thus risk creating new remote heads):
36 # git config --global remote-hg.force-push false
38 # If you want the equivalent of hg's clone/pull--insecure option:
39 # git config remote-hg.insecure true
41 # git:
42 # Sensible defaults for git.
43 # hg bookmarks are exported as git branches, hg branches are prefixed
44 # with 'branches/', HEAD is a special case.
46 # hg:
47 # Emulate hg-git.
48 # Only hg bookmarks are exported as git branches.
49 # Commits are modified to preserve hg information and allow bidirectionality.
52 NAME_RE = re.compile('^([^<>]+)')
53 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
54 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
55 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
57 def die(msg, *args):
58 sys.stderr.write('ERROR: %s\n' % (msg % args))
59 sys.exit(1)
61 def warn(msg, *args):
62 sys.stderr.write('WARNING: %s\n' % (msg % args))
64 def gitmode(flags):
65 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
67 def gittz(tz):
68 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
70 def hgmode(mode):
71 m = { '100755': 'x', '120000': 'l' }
72 return m.get(mode, '')
74 def hghex(node):
75 return hg.node.hex(node)
77 def get_config(config):
78 cmd = ['git', 'config', '--get', config]
79 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
80 output, _ = process.communicate()
81 return output
83 class Marks:
85 def __init__(self, path):
86 self.path = path
87 self.tips = {}
88 self.marks = {}
89 self.rev_marks = {}
90 self.last_mark = 0
92 self.load()
94 def load(self):
95 if not os.path.exists(self.path):
96 return
98 tmp = json.load(open(self.path))
100 self.tips = tmp['tips']
101 self.marks = tmp['marks']
102 self.last_mark = tmp['last-mark']
104 for rev, mark in self.marks.iteritems():
105 self.rev_marks[mark] = int(rev)
107 def dict(self):
108 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
110 def store(self):
111 json.dump(self.dict(), open(self.path, 'w'))
113 def __str__(self):
114 return str(self.dict())
116 def from_rev(self, rev):
117 return self.marks[str(rev)]
119 def to_rev(self, mark):
120 return self.rev_marks[mark]
122 def get_mark(self, rev):
123 self.last_mark += 1
124 self.marks[str(rev)] = self.last_mark
125 return self.last_mark
127 def new_mark(self, rev, mark):
128 self.marks[str(rev)] = mark
129 self.rev_marks[mark] = rev
130 self.last_mark = mark
132 def is_marked(self, rev):
133 return str(rev) in self.marks
135 def get_tip(self, branch):
136 return self.tips.get(branch, 0)
138 def set_tip(self, branch, tip):
139 self.tips[branch] = tip
141 class Parser:
143 def __init__(self, repo):
144 self.repo = repo
145 self.line = self.get_line()
147 def get_line(self):
148 return sys.stdin.readline().strip()
150 def __getitem__(self, i):
151 return self.line.split()[i]
153 def check(self, word):
154 return self.line.startswith(word)
156 def each_block(self, separator):
157 while self.line != separator:
158 yield self.line
159 self.line = self.get_line()
161 def __iter__(self):
162 return self.each_block('')
164 def next(self):
165 self.line = self.get_line()
166 if self.line == 'done':
167 self.line = None
169 def get_mark(self):
170 i = self.line.index(':') + 1
171 return int(self.line[i:])
173 def get_data(self):
174 if not self.check('data'):
175 return None
176 i = self.line.index(' ') + 1
177 size = int(self.line[i:])
178 return sys.stdin.read(size)
180 def get_author(self):
181 global bad_mail
183 ex = None
184 m = RAW_AUTHOR_RE.match(self.line)
185 if not m:
186 return None
187 _, name, email, date, tz = m.groups()
188 if name and 'ext:' in name:
189 m = re.match('^(.+?) ext:\((.+)\)$', name)
190 if m:
191 name = m.group(1)
192 ex = urllib.unquote(m.group(2))
194 if email != bad_mail:
195 if name:
196 user = '%s <%s>' % (name, email)
197 else:
198 user = '<%s>' % (email)
199 else:
200 user = name
202 if ex:
203 user += ex
205 tz = int(tz)
206 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
207 return (user, int(date), -tz)
209 def fix_file_path(path):
210 if not os.path.isabs(path):
211 return path
212 return os.path.relpath(path, '/')
214 def export_file(fc):
215 d = fc.data()
216 path = fix_file_path(fc.path())
217 print "M %s inline %s" % (gitmode(fc.flags()), path)
218 print "data %d" % len(d)
219 print d
221 def get_filechanges(repo, ctx, parent):
222 modified = set()
223 added = set()
224 removed = set()
226 cur = ctx.manifest()
227 prev = repo[parent].manifest().copy()
229 for fn in cur:
230 if fn in prev:
231 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
232 modified.add(fn)
233 del prev[fn]
234 else:
235 added.add(fn)
236 removed |= set(prev.keys())
238 return added | modified, removed
240 def fixup_user_git(user):
241 name = mail = None
242 user = user.replace('"', '')
243 m = AUTHOR_RE.match(user)
244 if m:
245 name = m.group(1)
246 mail = m.group(2).strip()
247 else:
248 m = NAME_RE.match(user)
249 if m:
250 name = m.group(1).strip()
251 return (name, mail)
253 def fixup_user_hg(user):
254 def sanitize(name):
255 # stole this from hg-git
256 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
258 m = AUTHOR_HG_RE.match(user)
259 if m:
260 name = sanitize(m.group(1))
261 mail = sanitize(m.group(2))
262 ex = m.group(3)
263 if ex:
264 name += ' ext:(' + urllib.quote(ex) + ')'
265 else:
266 name = sanitize(user)
267 if '@' in user:
268 mail = name
269 else:
270 mail = None
272 return (name, mail)
274 def fixup_user(user):
275 global mode, bad_mail
277 if mode == 'git':
278 name, mail = fixup_user_git(user)
279 else:
280 name, mail = fixup_user_hg(user)
282 if not name:
283 name = bad_name
284 if not mail:
285 mail = bad_mail
287 return '%s <%s>' % (name, mail)
289 def get_repo(url, alias):
290 global dirname, peer
292 myui = ui.ui()
293 myui.setconfig('ui', 'interactive', 'off')
294 myui.fout = sys.stderr
296 try:
297 if get_config('remote-hg.insecure') == 'true\n':
298 myui.setconfig('web', 'cacerts', '')
299 except subprocess.CalledProcessError:
300 pass
302 if hg.islocal(url):
303 repo = hg.repository(myui, url)
304 else:
305 local_path = os.path.join(dirname, 'clone')
306 if not os.path.exists(local_path):
307 try:
308 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
309 except:
310 die('Repository error')
311 repo = dstpeer.local()
312 else:
313 repo = hg.repository(myui, local_path)
314 try:
315 peer = hg.peer(myui, {}, url)
316 except:
317 die('Repository error')
318 repo.pull(peer, heads=None, force=True)
320 return repo
322 def rev_to_mark(rev):
323 global marks
324 return marks.from_rev(rev)
326 def mark_to_rev(mark):
327 global marks
328 return marks.to_rev(mark)
330 def export_ref(repo, name, kind, head):
331 global prefix, marks, mode
333 ename = '%s/%s' % (kind, name)
334 tip = marks.get_tip(ename)
336 # mercurial takes too much time checking this
337 if tip and tip == head.rev():
338 # nothing to do
339 return
340 revs = xrange(tip, head.rev() + 1)
341 count = 0
343 revs = [rev for rev in revs if not marks.is_marked(rev)]
345 for rev in revs:
347 c = repo[rev]
348 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
349 rev_branch = extra['branch']
351 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
352 if 'committer' in extra:
353 user, time, tz = extra['committer'].rsplit(' ', 2)
354 committer = "%s %s %s" % (user, time, gittz(int(tz)))
355 else:
356 committer = author
358 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
360 if len(parents) == 0:
361 modified = c.manifest().keys()
362 removed = []
363 else:
364 modified, removed = get_filechanges(repo, c, parents[0])
366 desc += '\n'
368 if mode == 'hg':
369 extra_msg = ''
371 if rev_branch != 'default':
372 extra_msg += 'branch : %s\n' % rev_branch
374 renames = []
375 for f in c.files():
376 if f not in c.manifest():
377 continue
378 rename = c.filectx(f).renamed()
379 if rename:
380 renames.append((rename[0], f))
382 for e in renames:
383 extra_msg += "rename : %s => %s\n" % e
385 for key, value in extra.iteritems():
386 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
387 continue
388 else:
389 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
391 if extra_msg:
392 desc += '\n--HG--\n' + extra_msg
394 if len(parents) == 0 and rev:
395 print 'reset %s/%s' % (prefix, ename)
397 print "commit %s/%s" % (prefix, ename)
398 print "mark :%d" % (marks.get_mark(rev))
399 print "author %s" % (author)
400 print "committer %s" % (committer)
401 print "data %d" % (len(desc))
402 print desc
404 if len(parents) > 0:
405 print "from :%s" % (rev_to_mark(parents[0]))
406 if len(parents) > 1:
407 print "merge :%s" % (rev_to_mark(parents[1]))
409 for f in modified:
410 export_file(c.filectx(f))
411 for f in removed:
412 print "D %s" % (fix_file_path(f))
413 print
415 count += 1
416 if (count % 100 == 0):
417 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
418 print "#############################################################"
420 # make sure the ref is updated
421 print "reset %s/%s" % (prefix, ename)
422 print "from :%u" % rev_to_mark(rev)
423 print
425 marks.set_tip(ename, rev)
427 def export_tag(repo, tag):
428 export_ref(repo, tag, 'tags', repo[tag])
430 def export_bookmark(repo, bmark):
431 head = bmarks[bmark]
432 export_ref(repo, bmark, 'bookmarks', head)
434 def export_branch(repo, branch):
435 tip = get_branch_tip(repo, branch)
436 head = repo[tip]
437 export_ref(repo, branch, 'branches', head)
439 def export_head(repo):
440 global g_head
441 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
443 def do_capabilities(parser):
444 global prefix, dirname
446 print "import"
447 print "export"
448 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
449 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
450 print "refspec refs/tags/*:%s/tags/*" % prefix
452 path = os.path.join(dirname, 'marks-git')
454 if os.path.exists(path):
455 print "*import-marks %s" % path
456 print "*export-marks %s" % path
458 print
460 def branch_tip(repo, branch):
461 # older versions of mercurial don't have this
462 if hasattr(repo, 'branchtip'):
463 return repo.branchtip(branch)
464 else:
465 return repo.branchtags()[branch]
467 def get_branch_tip(repo, branch):
468 global branches
470 heads = branches.get(branch, None)
471 if not heads:
472 return None
474 # verify there's only one head
475 if (len(heads) > 1):
476 warn("Branch '%s' has more than one head, consider merging" % branch)
477 return branch_tip(repo, branch)
479 return heads[0]
481 def list_head(repo, cur):
482 global g_head, bmarks
484 head = bookmarks.readcurrent(repo)
485 if head:
486 node = repo[head]
487 else:
488 # fake bookmark from current branch
489 head = cur
490 node = repo['.']
491 if not node:
492 node = repo['tip']
493 if not node:
494 return
495 if head == 'default':
496 head = 'master'
497 bmarks[head] = node
499 print "@refs/heads/%s HEAD" % head
500 g_head = (head, node)
502 def do_list(parser):
503 global branches, bmarks, mode, track_branches
505 repo = parser.repo
506 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
507 bmarks[bmark] = repo[node]
509 cur = repo.dirstate.branch()
511 list_head(repo, cur)
513 if track_branches:
514 for branch in repo.branchmap():
515 heads = repo.branchheads(branch)
516 if len(heads):
517 branches[branch] = heads
519 for branch in branches:
520 print "? refs/heads/branches/%s" % branch
522 for bmark in bmarks:
523 print "? refs/heads/%s" % bmark
525 for tag, node in repo.tagslist():
526 if tag == 'tip':
527 continue
528 print "? refs/tags/%s" % tag
530 print
532 def do_import(parser):
533 repo = parser.repo
535 path = os.path.join(dirname, 'marks-git')
537 print "feature done"
538 if os.path.exists(path):
539 print "feature import-marks=%s" % path
540 print "feature export-marks=%s" % path
541 sys.stdout.flush()
543 tmp = encoding.encoding
544 encoding.encoding = 'utf-8'
546 # lets get all the import lines
547 while parser.check('import'):
548 ref = parser[1]
550 if (ref == 'HEAD'):
551 export_head(repo)
552 elif ref.startswith('refs/heads/branches/'):
553 branch = ref[len('refs/heads/branches/'):]
554 export_branch(repo, branch)
555 elif ref.startswith('refs/heads/'):
556 bmark = ref[len('refs/heads/'):]
557 export_bookmark(repo, bmark)
558 elif ref.startswith('refs/tags/'):
559 tag = ref[len('refs/tags/'):]
560 export_tag(repo, tag)
562 parser.next()
564 encoding.encoding = tmp
566 print 'done'
568 def parse_blob(parser):
569 global blob_marks
571 parser.next()
572 mark = parser.get_mark()
573 parser.next()
574 data = parser.get_data()
575 blob_marks[mark] = data
576 parser.next()
578 def get_merge_files(repo, p1, p2, files):
579 for e in repo[p1].files():
580 if e not in files:
581 if e not in repo[p1].manifest():
582 continue
583 f = { 'ctx' : repo[p1][e] }
584 files[e] = f
586 def parse_commit(parser):
587 global marks, blob_marks, parsed_refs
588 global mode
590 from_mark = merge_mark = None
592 ref = parser[1]
593 parser.next()
595 commit_mark = parser.get_mark()
596 parser.next()
597 author = parser.get_author()
598 parser.next()
599 committer = parser.get_author()
600 parser.next()
601 data = parser.get_data()
602 parser.next()
603 if parser.check('from'):
604 from_mark = parser.get_mark()
605 parser.next()
606 if parser.check('merge'):
607 merge_mark = parser.get_mark()
608 parser.next()
609 if parser.check('merge'):
610 die('octopus merges are not supported yet')
612 files = {}
614 for line in parser:
615 if parser.check('M'):
616 t, m, mark_ref, path = line.split(' ', 3)
617 mark = int(mark_ref[1:])
618 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
619 elif parser.check('D'):
620 t, path = line.split(' ', 1)
621 f = { 'deleted' : True }
622 else:
623 die('Unknown file command: %s' % line)
624 files[path] = f
626 def getfilectx(repo, memctx, f):
627 of = files[f]
628 if 'deleted' in of:
629 raise IOError
630 if 'ctx' in of:
631 return of['ctx']
632 is_exec = of['mode'] == 'x'
633 is_link = of['mode'] == 'l'
634 rename = of.get('rename', None)
635 return context.memfilectx(f, of['data'],
636 is_link, is_exec, rename)
638 repo = parser.repo
640 user, date, tz = author
641 extra = {}
643 if committer != author:
644 extra['committer'] = "%s %u %u" % committer
646 if from_mark:
647 p1 = repo.changelog.node(mark_to_rev(from_mark))
648 else:
649 p1 = '\0' * 20
651 if merge_mark:
652 p2 = repo.changelog.node(mark_to_rev(merge_mark))
653 else:
654 p2 = '\0' * 20
657 # If files changed from any of the parents, hg wants to know, but in git if
658 # nothing changed from the first parent, nothing changed.
660 if merge_mark:
661 get_merge_files(repo, p1, p2, files)
663 # Check if the ref is supposed to be a named branch
664 if ref.startswith('refs/heads/branches/'):
665 extra['branch'] = ref[len('refs/heads/branches/'):]
667 if mode == 'hg':
668 i = data.find('\n--HG--\n')
669 if i >= 0:
670 tmp = data[i + len('\n--HG--\n'):].strip()
671 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
672 if k == 'rename':
673 old, new = v.split(' => ', 1)
674 files[new]['rename'] = old
675 elif k == 'branch':
676 extra[k] = v
677 elif k == 'extra':
678 ek, ev = v.split(' : ', 1)
679 extra[ek] = urllib.unquote(ev)
680 data = data[:i]
682 ctx = context.memctx(repo, (p1, p2), data,
683 files.keys(), getfilectx,
684 user, (date, tz), extra)
686 tmp = encoding.encoding
687 encoding.encoding = 'utf-8'
689 node = repo.commitctx(ctx)
691 encoding.encoding = tmp
693 rev = repo[node].rev()
695 parsed_refs[ref] = node
696 marks.new_mark(rev, commit_mark)
698 def parse_reset(parser):
699 global parsed_refs
701 ref = parser[1]
702 parser.next()
703 # ugh
704 if parser.check('commit'):
705 parse_commit(parser)
706 return
707 if not parser.check('from'):
708 return
709 from_mark = parser.get_mark()
710 parser.next()
712 node = parser.repo.changelog.node(mark_to_rev(from_mark))
713 parsed_refs[ref] = node
715 def parse_tag(parser):
716 name = parser[1]
717 parser.next()
718 from_mark = parser.get_mark()
719 parser.next()
720 tagger = parser.get_author()
721 parser.next()
722 data = parser.get_data()
723 parser.next()
725 # nothing to do
727 def do_export(parser):
728 global parsed_refs, bmarks, peer
730 p_bmarks = []
732 parser.next()
734 for line in parser.each_block('done'):
735 if parser.check('blob'):
736 parse_blob(parser)
737 elif parser.check('commit'):
738 parse_commit(parser)
739 elif parser.check('reset'):
740 parse_reset(parser)
741 elif parser.check('tag'):
742 parse_tag(parser)
743 elif parser.check('feature'):
744 pass
745 else:
746 die('unhandled export command: %s' % line)
748 for ref, node in parsed_refs.iteritems():
749 if ref.startswith('refs/heads/branches'):
750 branch = ref[len('refs/heads/branches/'):]
751 if branch in branches and node in branches[branch]:
752 # up to date
753 continue
754 print "ok %s" % ref
755 elif ref.startswith('refs/heads/'):
756 bmark = ref[len('refs/heads/'):]
757 p_bmarks.append((bmark, node))
758 continue
759 elif ref.startswith('refs/tags/'):
760 tag = ref[len('refs/tags/'):]
761 if mode == 'git':
762 msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
763 parser.repo.tag([tag], node, msg, False, None, {})
764 else:
765 parser.repo.tag([tag], node, None, True, None, {})
766 print "ok %s" % ref
767 else:
768 # transport-helper/fast-export bugs
769 continue
771 if peer:
772 parser.repo.push(peer, force=force_push)
774 # handle bookmarks
775 for bmark, node in p_bmarks:
776 ref = 'refs/heads/' + bmark
777 new = hghex(node)
779 if bmark in bmarks:
780 old = bmarks[bmark].hex()
781 else:
782 old = ''
784 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
785 # fake bookmark
786 pass
787 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
788 # updated locally
789 pass
790 else:
791 print "error %s" % ref
792 continue
794 if peer:
795 rb = peer.listkeys('bookmarks')
796 old = rb.get(bmark, '')
797 if not peer.pushkey('bookmarks', bmark, old, new):
798 print "error %s" % ref
799 continue
801 print "ok %s" % ref
803 print
805 def fix_path(alias, repo, orig_url):
806 url = urlparse.urlparse(orig_url, 'file')
807 if url.scheme != 'file' or os.path.isabs(url.path):
808 return
809 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
810 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % abs_url]
811 subprocess.call(cmd)
813 def main(args):
814 global prefix, dirname, branches, bmarks
815 global marks, blob_marks, parsed_refs
816 global peer, mode, bad_mail, bad_name
817 global track_branches, force_push, is_tmp
819 alias = args[1]
820 url = args[2]
821 peer = None
823 hg_git_compat = False
824 track_branches = True
825 force_push = True
827 try:
828 if get_config('remote-hg.hg-git-compat') == 'true\n':
829 hg_git_compat = True
830 track_branches = False
831 if get_config('remote-hg.track-branches') == 'false\n':
832 track_branches = False
833 if get_config('remote-hg.force-push') == 'false\n':
834 force_push = False
835 except subprocess.CalledProcessError:
836 pass
838 if hg_git_compat:
839 mode = 'hg'
840 bad_mail = 'none@none'
841 bad_name = ''
842 else:
843 mode = 'git'
844 bad_mail = 'unknown'
845 bad_name = 'Unknown'
847 if alias[4:] == url:
848 is_tmp = True
849 alias = util.sha1(alias).hexdigest()
850 else:
851 is_tmp = False
853 gitdir = os.environ['GIT_DIR']
854 dirname = os.path.join(gitdir, 'hg', alias)
855 branches = {}
856 bmarks = {}
857 blob_marks = {}
858 parsed_refs = {}
859 marks = None
861 repo = get_repo(url, alias)
862 prefix = 'refs/hg/%s' % alias
864 if not is_tmp:
865 fix_path(alias, peer or repo, url)
867 if not os.path.exists(dirname):
868 os.makedirs(dirname)
870 marks_path = os.path.join(dirname, 'marks-hg')
871 marks = Marks(marks_path)
873 parser = Parser(repo)
874 for line in parser:
875 if parser.check('capabilities'):
876 do_capabilities(parser)
877 elif parser.check('list'):
878 do_list(parser)
879 elif parser.check('import'):
880 do_import(parser)
881 elif parser.check('export'):
882 do_export(parser)
883 else:
884 die('unhandled command: %s' % line)
885 sys.stdout.flush()
887 def bye():
888 if not marks:
889 return
890 if not is_tmp:
891 marks.store()
892 else:
893 shutil.rmtree(dirname)
895 atexit.register(bye)
896 sys.exit(main(sys.argv))