remote-hg: use python urlparse
[git.git] / contrib / remote-helpers / git-remote-hg
blobb6589a3df868a59054c8e86f0713b48cb7127bb3
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 # For remote repositories a local clone is stored in
13 # "$GIT_DIR/hg/origin/clone/.hg/".
15 from mercurial import hg, ui, bookmarks, context, util, encoding, node, error
17 import re
18 import sys
19 import os
20 import json
21 import shutil
22 import subprocess
23 import urllib
24 import atexit
25 import urlparse
28 # If you want to switch to hg-git compatibility mode:
29 # git config --global remote-hg.hg-git-compat true
31 # If you are not in hg-git-compat mode and want to disable the tracking of
32 # named branches:
33 # git config --global remote-hg.track-branches false
35 # If you don't want to force pushes (and thus risk creating new remote heads):
36 # git config --global remote-hg.force-push false
38 # If you want the equivalent of hg's clone/pull--insecure option:
39 # git config remote-hg.insecure true
41 # git:
42 # Sensible defaults for git.
43 # hg bookmarks are exported as git branches, hg branches are prefixed
44 # with 'branches/', HEAD is a special case.
46 # hg:
47 # Emulate hg-git.
48 # Only hg bookmarks are exported as git branches.
49 # Commits are modified to preserve hg information and allow bidirectionality.
52 NAME_RE = re.compile('^([^<>]+)')
53 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
54 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
55 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
57 def die(msg, *args):
58 sys.stderr.write('ERROR: %s\n' % (msg % args))
59 sys.exit(1)
61 def warn(msg, *args):
62 sys.stderr.write('WARNING: %s\n' % (msg % args))
64 def gitmode(flags):
65 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
67 def gittz(tz):
68 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
70 def hgmode(mode):
71 m = { '100755': 'x', '120000': 'l' }
72 return m.get(mode, '')
74 def hghex(node):
75 return hg.node.hex(node)
77 def get_config(config):
78 cmd = ['git', 'config', '--get', config]
79 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
80 output, _ = process.communicate()
81 return output
83 class Marks:
85 def __init__(self, path):
86 self.path = path
87 self.tips = {}
88 self.marks = {}
89 self.rev_marks = {}
90 self.last_mark = 0
92 self.load()
94 def load(self):
95 if not os.path.exists(self.path):
96 return
98 tmp = json.load(open(self.path))
100 self.tips = tmp['tips']
101 self.marks = tmp['marks']
102 self.last_mark = tmp['last-mark']
104 for rev, mark in self.marks.iteritems():
105 self.rev_marks[mark] = int(rev)
107 def dict(self):
108 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
110 def store(self):
111 json.dump(self.dict(), open(self.path, 'w'))
113 def __str__(self):
114 return str(self.dict())
116 def from_rev(self, rev):
117 return self.marks[str(rev)]
119 def to_rev(self, mark):
120 return self.rev_marks[mark]
122 def get_mark(self, rev):
123 self.last_mark += 1
124 self.marks[str(rev)] = self.last_mark
125 return self.last_mark
127 def new_mark(self, rev, mark):
128 self.marks[str(rev)] = mark
129 self.rev_marks[mark] = rev
130 self.last_mark = mark
132 def is_marked(self, rev):
133 return str(rev) in self.marks
135 def get_tip(self, branch):
136 return self.tips.get(branch, 0)
138 def set_tip(self, branch, tip):
139 self.tips[branch] = tip
141 class Parser:
143 def __init__(self, repo):
144 self.repo = repo
145 self.line = self.get_line()
147 def get_line(self):
148 return sys.stdin.readline().strip()
150 def __getitem__(self, i):
151 return self.line.split()[i]
153 def check(self, word):
154 return self.line.startswith(word)
156 def each_block(self, separator):
157 while self.line != separator:
158 yield self.line
159 self.line = self.get_line()
161 def __iter__(self):
162 return self.each_block('')
164 def next(self):
165 self.line = self.get_line()
166 if self.line == 'done':
167 self.line = None
169 def get_mark(self):
170 i = self.line.index(':') + 1
171 return int(self.line[i:])
173 def get_data(self):
174 if not self.check('data'):
175 return None
176 i = self.line.index(' ') + 1
177 size = int(self.line[i:])
178 return sys.stdin.read(size)
180 def get_author(self):
181 global bad_mail
183 ex = None
184 m = RAW_AUTHOR_RE.match(self.line)
185 if not m:
186 return None
187 _, name, email, date, tz = m.groups()
188 if name and 'ext:' in name:
189 m = re.match('^(.+?) ext:\((.+)\)$', name)
190 if m:
191 name = m.group(1)
192 ex = urllib.unquote(m.group(2))
194 if email != bad_mail:
195 if name:
196 user = '%s <%s>' % (name, email)
197 else:
198 user = '<%s>' % (email)
199 else:
200 user = name
202 if ex:
203 user += ex
205 tz = int(tz)
206 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
207 return (user, int(date), -tz)
209 def fix_file_path(path):
210 if not os.path.isabs(path):
211 return path
212 return os.path.relpath(path, '/')
214 def export_file(fc):
215 d = fc.data()
216 path = fix_file_path(fc.path())
217 print "M %s inline %s" % (gitmode(fc.flags()), path)
218 print "data %d" % len(d)
219 print d
221 def get_filechanges(repo, ctx, parent):
222 modified = set()
223 added = set()
224 removed = set()
226 cur = ctx.manifest()
227 prev = repo[parent].manifest().copy()
229 for fn in cur:
230 if fn in prev:
231 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
232 modified.add(fn)
233 del prev[fn]
234 else:
235 added.add(fn)
236 removed |= set(prev.keys())
238 return added | modified, removed
240 def fixup_user_git(user):
241 name = mail = None
242 user = user.replace('"', '')
243 m = AUTHOR_RE.match(user)
244 if m:
245 name = m.group(1)
246 mail = m.group(2).strip()
247 else:
248 m = NAME_RE.match(user)
249 if m:
250 name = m.group(1).strip()
251 return (name, mail)
253 def fixup_user_hg(user):
254 def sanitize(name):
255 # stole this from hg-git
256 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
258 m = AUTHOR_HG_RE.match(user)
259 if m:
260 name = sanitize(m.group(1))
261 mail = sanitize(m.group(2))
262 ex = m.group(3)
263 if ex:
264 name += ' ext:(' + urllib.quote(ex) + ')'
265 else:
266 name = sanitize(user)
267 if '@' in user:
268 mail = name
269 else:
270 mail = None
272 return (name, mail)
274 def fixup_user(user):
275 global mode, bad_mail
277 if mode == 'git':
278 name, mail = fixup_user_git(user)
279 else:
280 name, mail = fixup_user_hg(user)
282 if not name:
283 name = bad_name
284 if not mail:
285 mail = bad_mail
287 return '%s <%s>' % (name, mail)
289 def get_repo(url, alias):
290 global dirname, peer
292 myui = ui.ui()
293 myui.setconfig('ui', 'interactive', 'off')
294 myui.fout = sys.stderr
296 try:
297 if get_config('remote-hg.insecure') == 'true\n':
298 myui.setconfig('web', 'cacerts', '')
299 except subprocess.CalledProcessError:
300 pass
302 if hg.islocal(url):
303 repo = hg.repository(myui, url)
304 else:
305 local_path = os.path.join(dirname, 'clone')
306 if not os.path.exists(local_path):
307 try:
308 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
309 except:
310 die('Repository error')
311 repo = dstpeer.local()
312 else:
313 repo = hg.repository(myui, local_path)
314 try:
315 peer = hg.peer(myui, {}, url)
316 except:
317 die('Repository error')
318 repo.pull(peer, heads=None, force=True)
320 return repo
322 def rev_to_mark(rev):
323 global marks
324 return marks.from_rev(rev)
326 def mark_to_rev(mark):
327 global marks
328 return marks.to_rev(mark)
330 def export_ref(repo, name, kind, head):
331 global prefix, marks, mode
333 ename = '%s/%s' % (kind, name)
334 tip = marks.get_tip(ename)
336 # mercurial takes too much time checking this
337 if tip and tip == head.rev():
338 # nothing to do
339 return
340 revs = xrange(tip, head.rev() + 1)
341 count = 0
343 revs = [rev for rev in revs if not marks.is_marked(rev)]
345 for rev in revs:
347 c = repo[rev]
348 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
349 rev_branch = extra['branch']
351 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
352 if 'committer' in extra:
353 user, time, tz = extra['committer'].rsplit(' ', 2)
354 committer = "%s %s %s" % (user, time, gittz(int(tz)))
355 else:
356 committer = author
358 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
360 if len(parents) == 0:
361 modified = c.manifest().keys()
362 removed = []
363 else:
364 modified, removed = get_filechanges(repo, c, parents[0])
366 desc += '\n'
368 if mode == 'hg':
369 extra_msg = ''
371 if rev_branch != 'default':
372 extra_msg += 'branch : %s\n' % rev_branch
374 renames = []
375 for f in c.files():
376 if f not in c.manifest():
377 continue
378 rename = c.filectx(f).renamed()
379 if rename:
380 renames.append((rename[0], f))
382 for e in renames:
383 extra_msg += "rename : %s => %s\n" % e
385 for key, value in extra.iteritems():
386 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
387 continue
388 else:
389 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
391 if extra_msg:
392 desc += '\n--HG--\n' + extra_msg
394 if len(parents) == 0 and rev:
395 print 'reset %s/%s' % (prefix, ename)
397 print "commit %s/%s" % (prefix, ename)
398 print "mark :%d" % (marks.get_mark(rev))
399 print "author %s" % (author)
400 print "committer %s" % (committer)
401 print "data %d" % (len(desc))
402 print desc
404 if len(parents) > 0:
405 print "from :%s" % (rev_to_mark(parents[0]))
406 if len(parents) > 1:
407 print "merge :%s" % (rev_to_mark(parents[1]))
409 for f in modified:
410 export_file(c.filectx(f))
411 for f in removed:
412 print "D %s" % (fix_file_path(f))
413 print
415 count += 1
416 if (count % 100 == 0):
417 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
418 print "#############################################################"
420 # make sure the ref is updated
421 print "reset %s/%s" % (prefix, ename)
422 print "from :%u" % rev_to_mark(rev)
423 print
425 marks.set_tip(ename, rev)
427 def export_tag(repo, tag):
428 export_ref(repo, tag, 'tags', repo[tag])
430 def export_bookmark(repo, bmark):
431 head = bmarks[bmark]
432 export_ref(repo, bmark, 'bookmarks', head)
434 def export_branch(repo, branch):
435 tip = get_branch_tip(repo, branch)
436 head = repo[tip]
437 export_ref(repo, branch, 'branches', head)
439 def export_head(repo):
440 global g_head
441 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
443 def do_capabilities(parser):
444 global prefix, dirname
446 print "import"
447 print "export"
448 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
449 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
450 print "refspec refs/tags/*:%s/tags/*" % prefix
452 path = os.path.join(dirname, 'marks-git')
454 if os.path.exists(path):
455 print "*import-marks %s" % path
456 print "*export-marks %s" % path
458 print
460 def get_branch_tip(repo, branch):
461 global branches
463 heads = branches.get(branch, None)
464 if not heads:
465 return None
467 # verify there's only one head
468 if (len(heads) > 1):
469 warn("Branch '%s' has more than one head, consider merging" % branch)
470 # older versions of mercurial don't have this
471 if hasattr(repo, "branchtip"):
472 return repo.branchtip(branch)
474 return heads[0]
476 def list_head(repo, cur):
477 global g_head, bmarks
479 head = bookmarks.readcurrent(repo)
480 if head:
481 node = repo[head]
482 else:
483 # fake bookmark from current branch
484 head = cur
485 node = repo['.']
486 if not node:
487 node = repo['tip']
488 if not node:
489 return
490 if head == 'default':
491 head = 'master'
492 bmarks[head] = node
494 print "@refs/heads/%s HEAD" % head
495 g_head = (head, node)
497 def do_list(parser):
498 global branches, bmarks, mode, track_branches
500 repo = parser.repo
501 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
502 bmarks[bmark] = repo[node]
504 cur = repo.dirstate.branch()
506 list_head(repo, cur)
508 if track_branches:
509 for branch in repo.branchmap():
510 heads = repo.branchheads(branch)
511 if len(heads):
512 branches[branch] = heads
514 for branch in branches:
515 print "? refs/heads/branches/%s" % branch
517 for bmark in bmarks:
518 print "? refs/heads/%s" % bmark
520 for tag, node in repo.tagslist():
521 if tag == 'tip':
522 continue
523 print "? refs/tags/%s" % tag
525 print
527 def do_import(parser):
528 repo = parser.repo
530 path = os.path.join(dirname, 'marks-git')
532 print "feature done"
533 if os.path.exists(path):
534 print "feature import-marks=%s" % path
535 print "feature export-marks=%s" % path
536 sys.stdout.flush()
538 tmp = encoding.encoding
539 encoding.encoding = 'utf-8'
541 # lets get all the import lines
542 while parser.check('import'):
543 ref = parser[1]
545 if (ref == 'HEAD'):
546 export_head(repo)
547 elif ref.startswith('refs/heads/branches/'):
548 branch = ref[len('refs/heads/branches/'):]
549 export_branch(repo, branch)
550 elif ref.startswith('refs/heads/'):
551 bmark = ref[len('refs/heads/'):]
552 export_bookmark(repo, bmark)
553 elif ref.startswith('refs/tags/'):
554 tag = ref[len('refs/tags/'):]
555 export_tag(repo, tag)
557 parser.next()
559 encoding.encoding = tmp
561 print 'done'
563 def parse_blob(parser):
564 global blob_marks
566 parser.next()
567 mark = parser.get_mark()
568 parser.next()
569 data = parser.get_data()
570 blob_marks[mark] = data
571 parser.next()
573 def get_merge_files(repo, p1, p2, files):
574 for e in repo[p1].files():
575 if e not in files:
576 if e not in repo[p1].manifest():
577 continue
578 f = { 'ctx' : repo[p1][e] }
579 files[e] = f
581 def parse_commit(parser):
582 global marks, blob_marks, parsed_refs
583 global mode
585 from_mark = merge_mark = None
587 ref = parser[1]
588 parser.next()
590 commit_mark = parser.get_mark()
591 parser.next()
592 author = parser.get_author()
593 parser.next()
594 committer = parser.get_author()
595 parser.next()
596 data = parser.get_data()
597 parser.next()
598 if parser.check('from'):
599 from_mark = parser.get_mark()
600 parser.next()
601 if parser.check('merge'):
602 merge_mark = parser.get_mark()
603 parser.next()
604 if parser.check('merge'):
605 die('octopus merges are not supported yet')
607 files = {}
609 for line in parser:
610 if parser.check('M'):
611 t, m, mark_ref, path = line.split(' ', 3)
612 mark = int(mark_ref[1:])
613 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
614 elif parser.check('D'):
615 t, path = line.split(' ', 1)
616 f = { 'deleted' : True }
617 else:
618 die('Unknown file command: %s' % line)
619 files[path] = f
621 def getfilectx(repo, memctx, f):
622 of = files[f]
623 if 'deleted' in of:
624 raise IOError
625 if 'ctx' in of:
626 return of['ctx']
627 is_exec = of['mode'] == 'x'
628 is_link = of['mode'] == 'l'
629 rename = of.get('rename', None)
630 return context.memfilectx(f, of['data'],
631 is_link, is_exec, rename)
633 repo = parser.repo
635 user, date, tz = author
636 extra = {}
638 if committer != author:
639 extra['committer'] = "%s %u %u" % committer
641 if from_mark:
642 p1 = repo.changelog.node(mark_to_rev(from_mark))
643 else:
644 p1 = '\0' * 20
646 if merge_mark:
647 p2 = repo.changelog.node(mark_to_rev(merge_mark))
648 else:
649 p2 = '\0' * 20
652 # If files changed from any of the parents, hg wants to know, but in git if
653 # nothing changed from the first parent, nothing changed.
655 if merge_mark:
656 get_merge_files(repo, p1, p2, files)
658 # Check if the ref is supposed to be a named branch
659 if ref.startswith('refs/heads/branches/'):
660 extra['branch'] = ref[len('refs/heads/branches/'):]
662 if mode == 'hg':
663 i = data.find('\n--HG--\n')
664 if i >= 0:
665 tmp = data[i + len('\n--HG--\n'):].strip()
666 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
667 if k == 'rename':
668 old, new = v.split(' => ', 1)
669 files[new]['rename'] = old
670 elif k == 'branch':
671 extra[k] = v
672 elif k == 'extra':
673 ek, ev = v.split(' : ', 1)
674 extra[ek] = urllib.unquote(ev)
675 data = data[:i]
677 ctx = context.memctx(repo, (p1, p2), data,
678 files.keys(), getfilectx,
679 user, (date, tz), extra)
681 tmp = encoding.encoding
682 encoding.encoding = 'utf-8'
684 node = repo.commitctx(ctx)
686 encoding.encoding = tmp
688 rev = repo[node].rev()
690 parsed_refs[ref] = node
691 marks.new_mark(rev, commit_mark)
693 def parse_reset(parser):
694 global parsed_refs
696 ref = parser[1]
697 parser.next()
698 # ugh
699 if parser.check('commit'):
700 parse_commit(parser)
701 return
702 if not parser.check('from'):
703 return
704 from_mark = parser.get_mark()
705 parser.next()
707 node = parser.repo.changelog.node(mark_to_rev(from_mark))
708 parsed_refs[ref] = node
710 def parse_tag(parser):
711 name = parser[1]
712 parser.next()
713 from_mark = parser.get_mark()
714 parser.next()
715 tagger = parser.get_author()
716 parser.next()
717 data = parser.get_data()
718 parser.next()
720 # nothing to do
722 def do_export(parser):
723 global parsed_refs, bmarks, peer
725 p_bmarks = []
727 parser.next()
729 for line in parser.each_block('done'):
730 if parser.check('blob'):
731 parse_blob(parser)
732 elif parser.check('commit'):
733 parse_commit(parser)
734 elif parser.check('reset'):
735 parse_reset(parser)
736 elif parser.check('tag'):
737 parse_tag(parser)
738 elif parser.check('feature'):
739 pass
740 else:
741 die('unhandled export command: %s' % line)
743 for ref, node in parsed_refs.iteritems():
744 if ref.startswith('refs/heads/branches'):
745 print "ok %s" % ref
746 elif ref.startswith('refs/heads/'):
747 bmark = ref[len('refs/heads/'):]
748 p_bmarks.append((bmark, node))
749 continue
750 elif ref.startswith('refs/tags/'):
751 tag = ref[len('refs/tags/'):]
752 if mode == 'git':
753 msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
754 parser.repo.tag([tag], node, msg, False, None, {})
755 else:
756 parser.repo.tag([tag], node, None, True, None, {})
757 print "ok %s" % ref
758 else:
759 # transport-helper/fast-export bugs
760 continue
762 if peer:
763 parser.repo.push(peer, force=force_push)
765 # handle bookmarks
766 for bmark, node in p_bmarks:
767 ref = 'refs/heads/' + bmark
768 new = hghex(node)
770 if bmark in bmarks:
771 old = bmarks[bmark].hex()
772 else:
773 old = ''
775 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
776 # fake bookmark
777 pass
778 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
779 # updated locally
780 pass
781 else:
782 print "error %s" % ref
783 continue
785 if peer:
786 rb = peer.listkeys('bookmarks')
787 old = rb.get(bmark, '')
788 if not peer.pushkey('bookmarks', bmark, old, new):
789 print "error %s" % ref
790 continue
792 print "ok %s" % ref
794 print
796 def fix_path(alias, repo, orig_url):
797 url = urlparse.urlparse(orig_url, 'file')
798 if url.scheme != 'file' or os.path.isabs(url.path):
799 return
800 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
801 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % abs_url]
802 subprocess.call(cmd)
804 def main(args):
805 global prefix, dirname, branches, bmarks
806 global marks, blob_marks, parsed_refs
807 global peer, mode, bad_mail, bad_name
808 global track_branches, force_push, is_tmp
810 alias = args[1]
811 url = args[2]
812 peer = None
814 hg_git_compat = False
815 track_branches = True
816 force_push = True
818 try:
819 if get_config('remote-hg.hg-git-compat') == 'true\n':
820 hg_git_compat = True
821 track_branches = False
822 if get_config('remote-hg.track-branches') == 'false\n':
823 track_branches = False
824 if get_config('remote-hg.force-push') == 'false\n':
825 force_push = False
826 except subprocess.CalledProcessError:
827 pass
829 if hg_git_compat:
830 mode = 'hg'
831 bad_mail = 'none@none'
832 bad_name = ''
833 else:
834 mode = 'git'
835 bad_mail = 'unknown'
836 bad_name = 'Unknown'
838 if alias[4:] == url:
839 is_tmp = True
840 alias = util.sha1(alias).hexdigest()
841 else:
842 is_tmp = False
844 gitdir = os.environ['GIT_DIR']
845 dirname = os.path.join(gitdir, 'hg', alias)
846 branches = {}
847 bmarks = {}
848 blob_marks = {}
849 parsed_refs = {}
850 marks = None
852 repo = get_repo(url, alias)
853 prefix = 'refs/hg/%s' % alias
855 if not is_tmp:
856 fix_path(alias, peer or repo, url)
858 if not os.path.exists(dirname):
859 os.makedirs(dirname)
861 marks_path = os.path.join(dirname, 'marks-hg')
862 marks = Marks(marks_path)
864 parser = Parser(repo)
865 for line in parser:
866 if parser.check('capabilities'):
867 do_capabilities(parser)
868 elif parser.check('list'):
869 do_list(parser)
870 elif parser.check('import'):
871 do_import(parser)
872 elif parser.check('export'):
873 do_export(parser)
874 else:
875 die('unhandled command: %s' % line)
876 sys.stdout.flush()
878 def bye():
879 if not marks:
880 return
881 if not is_tmp:
882 marks.store()
883 else:
884 shutil.rmtree(dirname)
886 atexit.register(bye)
887 sys.exit(main(sys.argv))