prompt: fix show upstream with svn and zsh
[git.git] / contrib / remote-helpers / git-remote-hg
blob45f6c80d45ab9848d1197e34a00b8ac5977a05fc
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # git:
27 # Sensible defaults for git.
28 # hg bookmarks are exported as git branches, hg branches are prefixed
29 # with 'branches/', HEAD is a special case.
31 # hg:
32 # Emulate hg-git.
33 # Only hg bookmarks are exported as git branches.
34 # Commits are modified to preserve hg information and allow bidirectionality.
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
39 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gitmode(flags):
50 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
52 def gittz(tz):
53 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
55 def hgmode(mode):
56 m = { '100755': 'x', '120000': 'l' }
57 return m.get(mode, '')
59 def get_config(config):
60 cmd = ['git', 'config', '--get', config]
61 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
62 output, _ = process.communicate()
63 return output
65 class Marks:
67 def __init__(self, path):
68 self.path = path
69 self.tips = {}
70 self.marks = {}
71 self.rev_marks = {}
72 self.last_mark = 0
74 self.load()
76 def load(self):
77 if not os.path.exists(self.path):
78 return
80 tmp = json.load(open(self.path))
82 self.tips = tmp['tips']
83 self.marks = tmp['marks']
84 self.last_mark = tmp['last-mark']
86 for rev, mark in self.marks.iteritems():
87 self.rev_marks[mark] = int(rev)
89 def dict(self):
90 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
92 def store(self):
93 json.dump(self.dict(), open(self.path, 'w'))
95 def __str__(self):
96 return str(self.dict())
98 def from_rev(self, rev):
99 return self.marks[str(rev)]
101 def to_rev(self, mark):
102 return self.rev_marks[mark]
104 def get_mark(self, rev):
105 self.last_mark += 1
106 self.marks[str(rev)] = self.last_mark
107 return self.last_mark
109 def new_mark(self, rev, mark):
110 self.marks[str(rev)] = mark
111 self.rev_marks[mark] = rev
112 self.last_mark = mark
114 def is_marked(self, rev):
115 return self.marks.has_key(str(rev))
117 def get_tip(self, branch):
118 return self.tips.get(branch, 0)
120 def set_tip(self, branch, tip):
121 self.tips[branch] = tip
123 class Parser:
125 def __init__(self, repo):
126 self.repo = repo
127 self.line = self.get_line()
129 def get_line(self):
130 return sys.stdin.readline().strip()
132 def __getitem__(self, i):
133 return self.line.split()[i]
135 def check(self, word):
136 return self.line.startswith(word)
138 def each_block(self, separator):
139 while self.line != separator:
140 yield self.line
141 self.line = self.get_line()
143 def __iter__(self):
144 return self.each_block('')
146 def next(self):
147 self.line = self.get_line()
148 if self.line == 'done':
149 self.line = None
151 def get_mark(self):
152 i = self.line.index(':') + 1
153 return int(self.line[i:])
155 def get_data(self):
156 if not self.check('data'):
157 return None
158 i = self.line.index(' ') + 1
159 size = int(self.line[i:])
160 return sys.stdin.read(size)
162 def get_author(self):
163 global bad_mail
165 ex = None
166 m = RAW_AUTHOR_RE.match(self.line)
167 if not m:
168 return None
169 _, name, email, date, tz = m.groups()
170 if name and 'ext:' in name:
171 m = re.match('^(.+?) ext:\((.+)\)$', name)
172 if m:
173 name = m.group(1)
174 ex = urllib.unquote(m.group(2))
176 if email != bad_mail:
177 if name:
178 user = '%s <%s>' % (name, email)
179 else:
180 user = '<%s>' % (email)
181 else:
182 user = name
184 if ex:
185 user += ex
187 tz = int(tz)
188 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
189 return (user, int(date), -tz)
191 def export_file(fc):
192 d = fc.data()
193 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
194 print "data %d" % len(d)
195 print d
197 def get_filechanges(repo, ctx, parent):
198 modified = set()
199 added = set()
200 removed = set()
202 cur = ctx.manifest()
203 prev = repo[parent].manifest().copy()
205 for fn in cur:
206 if fn in prev:
207 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
208 modified.add(fn)
209 del prev[fn]
210 else:
211 added.add(fn)
212 removed |= set(prev.keys())
214 return added | modified, removed
216 def fixup_user_git(user):
217 name = mail = None
218 user = user.replace('"', '')
219 m = AUTHOR_RE.match(user)
220 if m:
221 name = m.group(1)
222 mail = m.group(2).strip()
223 else:
224 m = NAME_RE.match(user)
225 if m:
226 name = m.group(1).strip()
227 return (name, mail)
229 def fixup_user_hg(user):
230 def sanitize(name):
231 # stole this from hg-git
232 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
234 m = AUTHOR_HG_RE.match(user)
235 if m:
236 name = sanitize(m.group(1))
237 mail = sanitize(m.group(2))
238 ex = m.group(3)
239 if ex:
240 name += ' ext:(' + urllib.quote(ex) + ')'
241 else:
242 name = sanitize(user)
243 if '@' in user:
244 mail = name
245 else:
246 mail = None
248 return (name, mail)
250 def fixup_user(user):
251 global mode, bad_mail
253 if mode == 'git':
254 name, mail = fixup_user_git(user)
255 else:
256 name, mail = fixup_user_hg(user)
258 if not name:
259 name = bad_name
260 if not mail:
261 mail = bad_mail
263 return '%s <%s>' % (name, mail)
265 def get_repo(url, alias):
266 global dirname, peer
268 myui = ui.ui()
269 myui.setconfig('ui', 'interactive', 'off')
271 if hg.islocal(url):
272 repo = hg.repository(myui, url)
273 else:
274 local_path = os.path.join(dirname, 'clone')
275 if not os.path.exists(local_path):
276 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
277 repo = dstpeer.local()
278 else:
279 repo = hg.repository(myui, local_path)
280 peer = hg.peer(myui, {}, url)
281 repo.pull(peer, heads=None, force=True)
283 return repo
285 def rev_to_mark(rev):
286 global marks
287 return marks.from_rev(rev)
289 def mark_to_rev(mark):
290 global marks
291 return marks.to_rev(mark)
293 def export_ref(repo, name, kind, head):
294 global prefix, marks, mode
296 ename = '%s/%s' % (kind, name)
297 tip = marks.get_tip(ename)
299 # mercurial takes too much time checking this
300 if tip and tip == head.rev():
301 # nothing to do
302 return
303 revs = xrange(tip, head.rev() + 1)
304 count = 0
306 revs = [rev for rev in revs if not marks.is_marked(rev)]
308 for rev in revs:
310 c = repo[rev]
311 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
312 rev_branch = extra['branch']
314 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
315 if 'committer' in extra:
316 user, time, tz = extra['committer'].rsplit(' ', 2)
317 committer = "%s %s %s" % (user, time, gittz(int(tz)))
318 else:
319 committer = author
321 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
323 if len(parents) == 0:
324 modified = c.manifest().keys()
325 removed = []
326 else:
327 modified, removed = get_filechanges(repo, c, parents[0])
329 desc += '\n'
331 if mode == 'hg':
332 extra_msg = ''
334 if rev_branch != 'default':
335 extra_msg += 'branch : %s\n' % rev_branch
337 renames = []
338 for f in c.files():
339 if f not in c.manifest():
340 continue
341 rename = c.filectx(f).renamed()
342 if rename:
343 renames.append((rename[0], f))
345 for e in renames:
346 extra_msg += "rename : %s => %s\n" % e
348 for key, value in extra.iteritems():
349 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
350 continue
351 else:
352 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
354 if extra_msg:
355 desc += '\n--HG--\n' + extra_msg
357 if len(parents) == 0 and rev:
358 print 'reset %s/%s' % (prefix, ename)
360 print "commit %s/%s" % (prefix, ename)
361 print "mark :%d" % (marks.get_mark(rev))
362 print "author %s" % (author)
363 print "committer %s" % (committer)
364 print "data %d" % (len(desc))
365 print desc
367 if len(parents) > 0:
368 print "from :%s" % (rev_to_mark(parents[0]))
369 if len(parents) > 1:
370 print "merge :%s" % (rev_to_mark(parents[1]))
372 for f in modified:
373 export_file(c.filectx(f))
374 for f in removed:
375 print "D %s" % (f)
376 print
378 count += 1
379 if (count % 100 == 0):
380 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
381 print "#############################################################"
383 # make sure the ref is updated
384 print "reset %s/%s" % (prefix, ename)
385 print "from :%u" % rev_to_mark(rev)
386 print
388 marks.set_tip(ename, rev)
390 def export_tag(repo, tag):
391 export_ref(repo, tag, 'tags', repo[tag])
393 def export_bookmark(repo, bmark):
394 head = bmarks[bmark]
395 export_ref(repo, bmark, 'bookmarks', head)
397 def export_branch(repo, branch):
398 tip = get_branch_tip(repo, branch)
399 head = repo[tip]
400 export_ref(repo, branch, 'branches', head)
402 def export_head(repo):
403 global g_head
404 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
406 def do_capabilities(parser):
407 global prefix, dirname
409 print "import"
410 print "export"
411 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
412 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
413 print "refspec refs/tags/*:%s/tags/*" % prefix
415 path = os.path.join(dirname, 'marks-git')
417 if os.path.exists(path):
418 print "*import-marks %s" % path
419 print "*export-marks %s" % path
421 print
423 def get_branch_tip(repo, branch):
424 global branches
426 heads = branches.get(branch, None)
427 if not heads:
428 return None
430 # verify there's only one head
431 if (len(heads) > 1):
432 warn("Branch '%s' has more than one head, consider merging" % branch)
433 # older versions of mercurial don't have this
434 if hasattr(repo, "branchtip"):
435 return repo.branchtip(branch)
437 return heads[0]
439 def list_head(repo, cur):
440 global g_head, bmarks
442 head = bookmarks.readcurrent(repo)
443 if head:
444 node = repo[head]
445 else:
446 # fake bookmark from current branch
447 head = cur
448 node = repo['.']
449 if not node:
450 node = repo['tip']
451 if not node:
452 return
453 if head == 'default':
454 head = 'master'
455 bmarks[head] = node
457 print "@refs/heads/%s HEAD" % head
458 g_head = (head, node)
460 def do_list(parser):
461 global branches, bmarks, mode, track_branches
463 repo = parser.repo
464 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
465 bmarks[bmark] = repo[node]
467 cur = repo.dirstate.branch()
469 list_head(repo, cur)
471 if track_branches:
472 for branch in repo.branchmap():
473 heads = repo.branchheads(branch)
474 if len(heads):
475 branches[branch] = heads
477 for branch in branches:
478 print "? refs/heads/branches/%s" % branch
480 for bmark in bmarks:
481 print "? refs/heads/%s" % bmark
483 for tag, node in repo.tagslist():
484 if tag == 'tip':
485 continue
486 print "? refs/tags/%s" % tag
488 print
490 def do_import(parser):
491 repo = parser.repo
493 path = os.path.join(dirname, 'marks-git')
495 print "feature done"
496 if os.path.exists(path):
497 print "feature import-marks=%s" % path
498 print "feature export-marks=%s" % path
499 sys.stdout.flush()
501 tmp = encoding.encoding
502 encoding.encoding = 'utf-8'
504 # lets get all the import lines
505 while parser.check('import'):
506 ref = parser[1]
508 if (ref == 'HEAD'):
509 export_head(repo)
510 elif ref.startswith('refs/heads/branches/'):
511 branch = ref[len('refs/heads/branches/'):]
512 export_branch(repo, branch)
513 elif ref.startswith('refs/heads/'):
514 bmark = ref[len('refs/heads/'):]
515 export_bookmark(repo, bmark)
516 elif ref.startswith('refs/tags/'):
517 tag = ref[len('refs/tags/'):]
518 export_tag(repo, tag)
520 parser.next()
522 encoding.encoding = tmp
524 print 'done'
526 def parse_blob(parser):
527 global blob_marks
529 parser.next()
530 mark = parser.get_mark()
531 parser.next()
532 data = parser.get_data()
533 blob_marks[mark] = data
534 parser.next()
535 return
537 def get_merge_files(repo, p1, p2, files):
538 for e in repo[p1].files():
539 if e not in files:
540 if e not in repo[p1].manifest():
541 continue
542 f = { 'ctx' : repo[p1][e] }
543 files[e] = f
545 def parse_commit(parser):
546 global marks, blob_marks, bmarks, parsed_refs
547 global mode
549 from_mark = merge_mark = None
551 ref = parser[1]
552 parser.next()
554 commit_mark = parser.get_mark()
555 parser.next()
556 author = parser.get_author()
557 parser.next()
558 committer = parser.get_author()
559 parser.next()
560 data = parser.get_data()
561 parser.next()
562 if parser.check('from'):
563 from_mark = parser.get_mark()
564 parser.next()
565 if parser.check('merge'):
566 merge_mark = parser.get_mark()
567 parser.next()
568 if parser.check('merge'):
569 die('octopus merges are not supported yet')
571 files = {}
573 for line in parser:
574 if parser.check('M'):
575 t, m, mark_ref, path = line.split(' ', 3)
576 mark = int(mark_ref[1:])
577 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
578 elif parser.check('D'):
579 t, path = line.split(' ')
580 f = { 'deleted' : True }
581 else:
582 die('Unknown file command: %s' % line)
583 files[path] = f
585 def getfilectx(repo, memctx, f):
586 of = files[f]
587 if 'deleted' in of:
588 raise IOError
589 if 'ctx' in of:
590 return of['ctx']
591 is_exec = of['mode'] == 'x'
592 is_link = of['mode'] == 'l'
593 rename = of.get('rename', None)
594 return context.memfilectx(f, of['data'],
595 is_link, is_exec, rename)
597 repo = parser.repo
599 user, date, tz = author
600 extra = {}
602 if committer != author:
603 extra['committer'] = "%s %u %u" % committer
605 if from_mark:
606 p1 = repo.changelog.node(mark_to_rev(from_mark))
607 else:
608 p1 = '\0' * 20
610 if merge_mark:
611 p2 = repo.changelog.node(mark_to_rev(merge_mark))
612 else:
613 p2 = '\0' * 20
616 # If files changed from any of the parents, hg wants to know, but in git if
617 # nothing changed from the first parent, nothing changed.
619 if merge_mark:
620 get_merge_files(repo, p1, p2, files)
622 if mode == 'hg':
623 i = data.find('\n--HG--\n')
624 if i >= 0:
625 tmp = data[i + len('\n--HG--\n'):].strip()
626 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
627 if k == 'rename':
628 old, new = v.split(' => ', 1)
629 files[new]['rename'] = old
630 elif k == 'branch':
631 extra[k] = v
632 elif k == 'extra':
633 ek, ev = v.split(' : ', 1)
634 extra[ek] = urllib.unquote(ev)
635 data = data[:i]
637 ctx = context.memctx(repo, (p1, p2), data,
638 files.keys(), getfilectx,
639 user, (date, tz), extra)
641 tmp = encoding.encoding
642 encoding.encoding = 'utf-8'
644 node = repo.commitctx(ctx)
646 encoding.encoding = tmp
648 rev = repo[node].rev()
650 parsed_refs[ref] = node
652 marks.new_mark(rev, commit_mark)
654 def parse_reset(parser):
655 ref = parser[1]
656 parser.next()
657 # ugh
658 if parser.check('commit'):
659 parse_commit(parser)
660 return
661 if not parser.check('from'):
662 return
663 from_mark = parser.get_mark()
664 parser.next()
666 node = parser.repo.changelog.node(mark_to_rev(from_mark))
667 parsed_refs[ref] = node
669 def parse_tag(parser):
670 name = parser[1]
671 parser.next()
672 from_mark = parser.get_mark()
673 parser.next()
674 tagger = parser.get_author()
675 parser.next()
676 data = parser.get_data()
677 parser.next()
679 # nothing to do
681 def do_export(parser):
682 global parsed_refs, bmarks, peer
684 parser.next()
686 for line in parser.each_block('done'):
687 if parser.check('blob'):
688 parse_blob(parser)
689 elif parser.check('commit'):
690 parse_commit(parser)
691 elif parser.check('reset'):
692 parse_reset(parser)
693 elif parser.check('tag'):
694 parse_tag(parser)
695 elif parser.check('feature'):
696 pass
697 else:
698 die('unhandled export command: %s' % line)
700 for ref, node in parsed_refs.iteritems():
701 if ref.startswith('refs/heads/branches'):
702 pass
703 elif ref.startswith('refs/heads/'):
704 bmark = ref[len('refs/heads/'):]
705 if bmark in bmarks:
706 old = bmarks[bmark].hex()
707 else:
708 old = ''
709 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
710 continue
711 elif ref.startswith('refs/tags/'):
712 tag = ref[len('refs/tags/'):]
713 parser.repo.tag([tag], node, None, True, None, {})
714 else:
715 # transport-helper/fast-export bugs
716 continue
717 print "ok %s" % ref
719 print
721 if peer:
722 parser.repo.push(peer, force=False)
724 def fix_path(alias, repo, orig_url):
725 repo_url = util.url(repo.url())
726 url = util.url(orig_url)
727 if str(url) == str(repo_url):
728 return
729 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
730 subprocess.call(cmd)
732 def main(args):
733 global prefix, dirname, branches, bmarks
734 global marks, blob_marks, parsed_refs
735 global peer, mode, bad_mail, bad_name
736 global track_branches
738 alias = args[1]
739 url = args[2]
740 peer = None
742 hg_git_compat = False
743 track_branches = True
744 try:
745 if get_config('remote-hg.hg-git-compat') == 'true\n':
746 hg_git_compat = True
747 track_branches = False
748 if get_config('remote-hg.track-branches') == 'false\n':
749 track_branches = False
750 except subprocess.CalledProcessError:
751 pass
753 if hg_git_compat:
754 mode = 'hg'
755 bad_mail = 'none@none'
756 bad_name = ''
757 else:
758 mode = 'git'
759 bad_mail = 'unknown'
760 bad_name = 'Unknown'
762 if alias[4:] == url:
763 is_tmp = True
764 alias = util.sha1(alias).hexdigest()
765 else:
766 is_tmp = False
768 gitdir = os.environ['GIT_DIR']
769 dirname = os.path.join(gitdir, 'hg', alias)
770 branches = {}
771 bmarks = {}
772 blob_marks = {}
773 parsed_refs = {}
775 repo = get_repo(url, alias)
776 prefix = 'refs/hg/%s' % alias
778 if not is_tmp:
779 fix_path(alias, peer or repo, url)
781 if not os.path.exists(dirname):
782 os.makedirs(dirname)
784 marks_path = os.path.join(dirname, 'marks-hg')
785 marks = Marks(marks_path)
787 parser = Parser(repo)
788 for line in parser:
789 if parser.check('capabilities'):
790 do_capabilities(parser)
791 elif parser.check('list'):
792 do_list(parser)
793 elif parser.check('import'):
794 do_import(parser)
795 elif parser.check('export'):
796 do_export(parser)
797 else:
798 die('unhandled command: %s' % line)
799 sys.stdout.flush()
801 if not is_tmp:
802 marks.store()
803 else:
804 shutil.rmtree(dirname)
806 sys.exit(main(sys.argv))