remote-hg: fix bad state issue
[git/debian.git] / contrib / remote-helpers / git-remote-hg
blobe3d7f778797247b6f49a710904ca004f6bd90834
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding, node, error
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
21 import atexit
24 # If you want to switch to hg-git compatibility mode:
25 # git config --global remote-hg.hg-git-compat true
27 # If you are not in hg-git-compat mode and want to disable the tracking of
28 # named branches:
29 # git config --global remote-hg.track-branches false
31 # If you don't want to force pushes (and thus risk creating new remote heads):
32 # git config --global remote-hg.force-push false
34 # If you want the equivalent of hg's clone/pull--insecure option:
35 # git config remote-hg.insecure true
37 # git:
38 # Sensible defaults for git.
39 # hg bookmarks are exported as git branches, hg branches are prefixed
40 # with 'branches/', HEAD is a special case.
42 # hg:
43 # Emulate hg-git.
44 # Only hg bookmarks are exported as git branches.
45 # Commits are modified to preserve hg information and allow bidirectionality.
48 NAME_RE = re.compile('^([^<>]+)')
49 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
50 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
51 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
53 def die(msg, *args):
54 sys.stderr.write('ERROR: %s\n' % (msg % args))
55 sys.exit(1)
57 def warn(msg, *args):
58 sys.stderr.write('WARNING: %s\n' % (msg % args))
60 def gitmode(flags):
61 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
63 def gittz(tz):
64 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
66 def hgmode(mode):
67 m = { '100755': 'x', '120000': 'l' }
68 return m.get(mode, '')
70 def hghex(node):
71 return hg.node.hex(node)
73 def get_config(config):
74 cmd = ['git', 'config', '--get', config]
75 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
76 output, _ = process.communicate()
77 return output
79 class Marks:
81 def __init__(self, path):
82 self.path = path
83 self.tips = {}
84 self.marks = {}
85 self.rev_marks = {}
86 self.last_mark = 0
88 self.load()
90 def load(self):
91 if not os.path.exists(self.path):
92 return
94 tmp = json.load(open(self.path))
96 self.tips = tmp['tips']
97 self.marks = tmp['marks']
98 self.last_mark = tmp['last-mark']
100 for rev, mark in self.marks.iteritems():
101 self.rev_marks[mark] = int(rev)
103 def dict(self):
104 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
106 def store(self):
107 json.dump(self.dict(), open(self.path, 'w'))
109 def __str__(self):
110 return str(self.dict())
112 def from_rev(self, rev):
113 return self.marks[str(rev)]
115 def to_rev(self, mark):
116 return self.rev_marks[mark]
118 def get_mark(self, rev):
119 self.last_mark += 1
120 self.marks[str(rev)] = self.last_mark
121 return self.last_mark
123 def new_mark(self, rev, mark):
124 self.marks[str(rev)] = mark
125 self.rev_marks[mark] = rev
126 self.last_mark = mark
128 def is_marked(self, rev):
129 return self.marks.has_key(str(rev))
131 def get_tip(self, branch):
132 return self.tips.get(branch, 0)
134 def set_tip(self, branch, tip):
135 self.tips[branch] = tip
137 class Parser:
139 def __init__(self, repo):
140 self.repo = repo
141 self.line = self.get_line()
143 def get_line(self):
144 return sys.stdin.readline().strip()
146 def __getitem__(self, i):
147 return self.line.split()[i]
149 def check(self, word):
150 return self.line.startswith(word)
152 def each_block(self, separator):
153 while self.line != separator:
154 yield self.line
155 self.line = self.get_line()
157 def __iter__(self):
158 return self.each_block('')
160 def next(self):
161 self.line = self.get_line()
162 if self.line == 'done':
163 self.line = None
165 def get_mark(self):
166 i = self.line.index(':') + 1
167 return int(self.line[i:])
169 def get_data(self):
170 if not self.check('data'):
171 return None
172 i = self.line.index(' ') + 1
173 size = int(self.line[i:])
174 return sys.stdin.read(size)
176 def get_author(self):
177 global bad_mail
179 ex = None
180 m = RAW_AUTHOR_RE.match(self.line)
181 if not m:
182 return None
183 _, name, email, date, tz = m.groups()
184 if name and 'ext:' in name:
185 m = re.match('^(.+?) ext:\((.+)\)$', name)
186 if m:
187 name = m.group(1)
188 ex = urllib.unquote(m.group(2))
190 if email != bad_mail:
191 if name:
192 user = '%s <%s>' % (name, email)
193 else:
194 user = '<%s>' % (email)
195 else:
196 user = name
198 if ex:
199 user += ex
201 tz = int(tz)
202 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
203 return (user, int(date), -tz)
205 def export_file(fc):
206 d = fc.data()
207 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
208 print "data %d" % len(d)
209 print d
211 def get_filechanges(repo, ctx, parent):
212 modified = set()
213 added = set()
214 removed = set()
216 cur = ctx.manifest()
217 prev = repo[parent].manifest().copy()
219 for fn in cur:
220 if fn in prev:
221 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
222 modified.add(fn)
223 del prev[fn]
224 else:
225 added.add(fn)
226 removed |= set(prev.keys())
228 return added | modified, removed
230 def fixup_user_git(user):
231 name = mail = None
232 user = user.replace('"', '')
233 m = AUTHOR_RE.match(user)
234 if m:
235 name = m.group(1)
236 mail = m.group(2).strip()
237 else:
238 m = NAME_RE.match(user)
239 if m:
240 name = m.group(1).strip()
241 return (name, mail)
243 def fixup_user_hg(user):
244 def sanitize(name):
245 # stole this from hg-git
246 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
248 m = AUTHOR_HG_RE.match(user)
249 if m:
250 name = sanitize(m.group(1))
251 mail = sanitize(m.group(2))
252 ex = m.group(3)
253 if ex:
254 name += ' ext:(' + urllib.quote(ex) + ')'
255 else:
256 name = sanitize(user)
257 if '@' in user:
258 mail = name
259 else:
260 mail = None
262 return (name, mail)
264 def fixup_user(user):
265 global mode, bad_mail
267 if mode == 'git':
268 name, mail = fixup_user_git(user)
269 else:
270 name, mail = fixup_user_hg(user)
272 if not name:
273 name = bad_name
274 if not mail:
275 mail = bad_mail
277 return '%s <%s>' % (name, mail)
279 def get_repo(url, alias):
280 global dirname, peer
282 myui = ui.ui()
283 myui.setconfig('ui', 'interactive', 'off')
284 myui.fout = sys.stderr
286 try:
287 if get_config('remote-hg.insecure') == 'true\n':
288 myui.setconfig('web', 'cacerts', '')
289 except subprocess.CalledProcessError:
290 pass
292 if hg.islocal(url):
293 repo = hg.repository(myui, url)
294 else:
295 local_path = os.path.join(dirname, 'clone')
296 if not os.path.exists(local_path):
297 try:
298 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
299 except:
300 die('Repository error')
301 repo = dstpeer.local()
302 else:
303 repo = hg.repository(myui, local_path)
304 try:
305 peer = hg.peer(myui, {}, url)
306 except:
307 die('Repository error')
308 repo.pull(peer, heads=None, force=True)
310 return repo
312 def rev_to_mark(rev):
313 global marks
314 return marks.from_rev(rev)
316 def mark_to_rev(mark):
317 global marks
318 return marks.to_rev(mark)
320 def export_ref(repo, name, kind, head):
321 global prefix, marks, mode
323 ename = '%s/%s' % (kind, name)
324 tip = marks.get_tip(ename)
326 # mercurial takes too much time checking this
327 if tip and tip == head.rev():
328 # nothing to do
329 return
330 revs = xrange(tip, head.rev() + 1)
331 count = 0
333 revs = [rev for rev in revs if not marks.is_marked(rev)]
335 for rev in revs:
337 c = repo[rev]
338 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
339 rev_branch = extra['branch']
341 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
342 if 'committer' in extra:
343 user, time, tz = extra['committer'].rsplit(' ', 2)
344 committer = "%s %s %s" % (user, time, gittz(int(tz)))
345 else:
346 committer = author
348 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
350 if len(parents) == 0:
351 modified = c.manifest().keys()
352 removed = []
353 else:
354 modified, removed = get_filechanges(repo, c, parents[0])
356 if mode == 'hg':
357 extra_msg = ''
359 if rev_branch != 'default':
360 extra_msg += 'branch : %s\n' % rev_branch
362 renames = []
363 for f in c.files():
364 if f not in c.manifest():
365 continue
366 rename = c.filectx(f).renamed()
367 if rename:
368 renames.append((rename[0], f))
370 for e in renames:
371 extra_msg += "rename : %s => %s\n" % e
373 for key, value in extra.iteritems():
374 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
375 continue
376 else:
377 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
379 desc += '\n'
380 if extra_msg:
381 desc += '\n--HG--\n' + extra_msg
383 if len(parents) == 0 and rev:
384 print 'reset %s/%s' % (prefix, ename)
386 print "commit %s/%s" % (prefix, ename)
387 print "mark :%d" % (marks.get_mark(rev))
388 print "author %s" % (author)
389 print "committer %s" % (committer)
390 print "data %d" % (len(desc))
391 print desc
393 if len(parents) > 0:
394 print "from :%s" % (rev_to_mark(parents[0]))
395 if len(parents) > 1:
396 print "merge :%s" % (rev_to_mark(parents[1]))
398 for f in modified:
399 export_file(c.filectx(f))
400 for f in removed:
401 print "D %s" % (f)
402 print
404 count += 1
405 if (count % 100 == 0):
406 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
407 print "#############################################################"
409 # make sure the ref is updated
410 print "reset %s/%s" % (prefix, ename)
411 print "from :%u" % rev_to_mark(rev)
412 print
414 marks.set_tip(ename, rev)
416 def export_tag(repo, tag):
417 export_ref(repo, tag, 'tags', repo[tag])
419 def export_bookmark(repo, bmark):
420 head = bmarks[bmark]
421 export_ref(repo, bmark, 'bookmarks', head)
423 def export_branch(repo, branch):
424 tip = get_branch_tip(repo, branch)
425 head = repo[tip]
426 export_ref(repo, branch, 'branches', head)
428 def export_head(repo):
429 global g_head
430 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
432 def do_capabilities(parser):
433 global prefix, dirname
435 print "import"
436 print "export"
437 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
438 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
439 print "refspec refs/tags/*:%s/tags/*" % prefix
441 path = os.path.join(dirname, 'marks-git')
443 if os.path.exists(path):
444 print "*import-marks %s" % path
445 print "*export-marks %s" % path
447 print
449 def get_branch_tip(repo, branch):
450 global branches
452 heads = branches.get(branch, None)
453 if not heads:
454 return None
456 # verify there's only one head
457 if (len(heads) > 1):
458 warn("Branch '%s' has more than one head, consider merging" % branch)
459 # older versions of mercurial don't have this
460 if hasattr(repo, "branchtip"):
461 return repo.branchtip(branch)
463 return heads[0]
465 def list_head(repo, cur):
466 global g_head, bmarks
468 head = bookmarks.readcurrent(repo)
469 if head:
470 node = repo[head]
471 else:
472 # fake bookmark from current branch
473 head = cur
474 node = repo['.']
475 if not node:
476 node = repo['tip']
477 if not node:
478 return
479 if head == 'default':
480 head = 'master'
481 bmarks[head] = node
483 print "@refs/heads/%s HEAD" % head
484 g_head = (head, node)
486 def do_list(parser):
487 global branches, bmarks, mode, track_branches
489 repo = parser.repo
490 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
491 bmarks[bmark] = repo[node]
493 cur = repo.dirstate.branch()
495 list_head(repo, cur)
497 if track_branches:
498 for branch in repo.branchmap():
499 heads = repo.branchheads(branch)
500 if len(heads):
501 branches[branch] = heads
503 for branch in branches:
504 print "? refs/heads/branches/%s" % branch
506 for bmark in bmarks:
507 print "? refs/heads/%s" % bmark
509 for tag, node in repo.tagslist():
510 if tag == 'tip':
511 continue
512 print "? refs/tags/%s" % tag
514 print
516 def do_import(parser):
517 repo = parser.repo
519 path = os.path.join(dirname, 'marks-git')
521 print "feature done"
522 if os.path.exists(path):
523 print "feature import-marks=%s" % path
524 print "feature export-marks=%s" % path
525 sys.stdout.flush()
527 tmp = encoding.encoding
528 encoding.encoding = 'utf-8'
530 # lets get all the import lines
531 while parser.check('import'):
532 ref = parser[1]
534 if (ref == 'HEAD'):
535 export_head(repo)
536 elif ref.startswith('refs/heads/branches/'):
537 branch = ref[len('refs/heads/branches/'):]
538 export_branch(repo, branch)
539 elif ref.startswith('refs/heads/'):
540 bmark = ref[len('refs/heads/'):]
541 export_bookmark(repo, bmark)
542 elif ref.startswith('refs/tags/'):
543 tag = ref[len('refs/tags/'):]
544 export_tag(repo, tag)
546 parser.next()
548 encoding.encoding = tmp
550 print 'done'
552 def parse_blob(parser):
553 global blob_marks
555 parser.next()
556 mark = parser.get_mark()
557 parser.next()
558 data = parser.get_data()
559 blob_marks[mark] = data
560 parser.next()
562 def get_merge_files(repo, p1, p2, files):
563 for e in repo[p1].files():
564 if e not in files:
565 if e not in repo[p1].manifest():
566 continue
567 f = { 'ctx' : repo[p1][e] }
568 files[e] = f
570 def parse_commit(parser):
571 global marks, blob_marks, parsed_refs
572 global mode
574 from_mark = merge_mark = None
576 ref = parser[1]
577 parser.next()
579 commit_mark = parser.get_mark()
580 parser.next()
581 author = parser.get_author()
582 parser.next()
583 committer = parser.get_author()
584 parser.next()
585 data = parser.get_data()
586 parser.next()
587 if parser.check('from'):
588 from_mark = parser.get_mark()
589 parser.next()
590 if parser.check('merge'):
591 merge_mark = parser.get_mark()
592 parser.next()
593 if parser.check('merge'):
594 die('octopus merges are not supported yet')
596 files = {}
598 for line in parser:
599 if parser.check('M'):
600 t, m, mark_ref, path = line.split(' ', 3)
601 mark = int(mark_ref[1:])
602 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
603 elif parser.check('D'):
604 t, path = line.split(' ', 1)
605 f = { 'deleted' : True }
606 else:
607 die('Unknown file command: %s' % line)
608 files[path] = f
610 def getfilectx(repo, memctx, f):
611 of = files[f]
612 if 'deleted' in of:
613 raise IOError
614 if 'ctx' in of:
615 return of['ctx']
616 is_exec = of['mode'] == 'x'
617 is_link = of['mode'] == 'l'
618 rename = of.get('rename', None)
619 return context.memfilectx(f, of['data'],
620 is_link, is_exec, rename)
622 repo = parser.repo
624 user, date, tz = author
625 extra = {}
627 if committer != author:
628 extra['committer'] = "%s %u %u" % committer
630 if from_mark:
631 p1 = repo.changelog.node(mark_to_rev(from_mark))
632 else:
633 p1 = '\0' * 20
635 if merge_mark:
636 p2 = repo.changelog.node(mark_to_rev(merge_mark))
637 else:
638 p2 = '\0' * 20
641 # If files changed from any of the parents, hg wants to know, but in git if
642 # nothing changed from the first parent, nothing changed.
644 if merge_mark:
645 get_merge_files(repo, p1, p2, files)
647 # Check if the ref is supposed to be a named branch
648 if ref.startswith('refs/heads/branches/'):
649 extra['branch'] = ref[len('refs/heads/branches/'):]
651 if mode == 'hg':
652 i = data.find('\n--HG--\n')
653 if i >= 0:
654 tmp = data[i + len('\n--HG--\n'):].strip()
655 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
656 if k == 'rename':
657 old, new = v.split(' => ', 1)
658 files[new]['rename'] = old
659 elif k == 'branch':
660 extra[k] = v
661 elif k == 'extra':
662 ek, ev = v.split(' : ', 1)
663 extra[ek] = urllib.unquote(ev)
664 data = data[:i]
666 ctx = context.memctx(repo, (p1, p2), data,
667 files.keys(), getfilectx,
668 user, (date, tz), extra)
670 tmp = encoding.encoding
671 encoding.encoding = 'utf-8'
673 node = repo.commitctx(ctx)
675 encoding.encoding = tmp
677 rev = repo[node].rev()
679 parsed_refs[ref] = node
680 marks.new_mark(rev, commit_mark)
682 def parse_reset(parser):
683 global parsed_refs
685 ref = parser[1]
686 parser.next()
687 # ugh
688 if parser.check('commit'):
689 parse_commit(parser)
690 return
691 if not parser.check('from'):
692 return
693 from_mark = parser.get_mark()
694 parser.next()
696 node = parser.repo.changelog.node(mark_to_rev(from_mark))
697 parsed_refs[ref] = node
699 def parse_tag(parser):
700 name = parser[1]
701 parser.next()
702 from_mark = parser.get_mark()
703 parser.next()
704 tagger = parser.get_author()
705 parser.next()
706 data = parser.get_data()
707 parser.next()
709 # nothing to do
711 def do_export(parser):
712 global parsed_refs, bmarks, peer
714 p_bmarks = []
716 parser.next()
718 for line in parser.each_block('done'):
719 if parser.check('blob'):
720 parse_blob(parser)
721 elif parser.check('commit'):
722 parse_commit(parser)
723 elif parser.check('reset'):
724 parse_reset(parser)
725 elif parser.check('tag'):
726 parse_tag(parser)
727 elif parser.check('feature'):
728 pass
729 else:
730 die('unhandled export command: %s' % line)
732 for ref, node in parsed_refs.iteritems():
733 if ref.startswith('refs/heads/branches'):
734 print "ok %s" % ref
735 elif ref.startswith('refs/heads/'):
736 bmark = ref[len('refs/heads/'):]
737 p_bmarks.append((bmark, node))
738 continue
739 elif ref.startswith('refs/tags/'):
740 tag = ref[len('refs/tags/'):]
741 if mode == 'git':
742 msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
743 parser.repo.tag([tag], node, msg, False, None, {})
744 else:
745 parser.repo.tag([tag], node, None, True, None, {})
746 print "ok %s" % ref
747 else:
748 # transport-helper/fast-export bugs
749 continue
751 if peer:
752 parser.repo.push(peer, force=force_push)
754 # handle bookmarks
755 for bmark, node in p_bmarks:
756 ref = 'refs/heads/' + bmark
757 new = hghex(node)
759 if bmark in bmarks:
760 old = bmarks[bmark].hex()
761 else:
762 old = ''
764 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
765 # fake bookmark
766 pass
767 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
768 # updated locally
769 pass
770 else:
771 print "error %s" % ref
772 continue
774 if peer:
775 if not peer.pushkey('bookmarks', bmark, old, new):
776 print "error %s" % ref
777 continue
779 print "ok %s" % ref
781 print
783 def fix_path(alias, repo, orig_url):
784 repo_url = util.url(repo.url())
785 url = util.url(orig_url)
786 if str(url) == str(repo_url):
787 return
788 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
789 subprocess.call(cmd)
791 def main(args):
792 global prefix, dirname, branches, bmarks
793 global marks, blob_marks, parsed_refs
794 global peer, mode, bad_mail, bad_name
795 global track_branches, force_push, is_tmp
797 alias = args[1]
798 url = args[2]
799 peer = None
801 hg_git_compat = False
802 track_branches = True
803 force_push = True
805 try:
806 if get_config('remote-hg.hg-git-compat') == 'true\n':
807 hg_git_compat = True
808 track_branches = False
809 if get_config('remote-hg.track-branches') == 'false\n':
810 track_branches = False
811 if get_config('remote-hg.force-push') == 'false\n':
812 force_push = False
813 except subprocess.CalledProcessError:
814 pass
816 if hg_git_compat:
817 mode = 'hg'
818 bad_mail = 'none@none'
819 bad_name = ''
820 else:
821 mode = 'git'
822 bad_mail = 'unknown'
823 bad_name = 'Unknown'
825 if alias[4:] == url:
826 is_tmp = True
827 alias = util.sha1(alias).hexdigest()
828 else:
829 is_tmp = False
831 gitdir = os.environ['GIT_DIR']
832 dirname = os.path.join(gitdir, 'hg', alias)
833 branches = {}
834 bmarks = {}
835 blob_marks = {}
836 parsed_refs = {}
837 marks = None
839 repo = get_repo(url, alias)
840 prefix = 'refs/hg/%s' % alias
842 if not is_tmp:
843 fix_path(alias, peer or repo, url)
845 if not os.path.exists(dirname):
846 os.makedirs(dirname)
848 marks_path = os.path.join(dirname, 'marks-hg')
849 marks = Marks(marks_path)
851 parser = Parser(repo)
852 for line in parser:
853 if parser.check('capabilities'):
854 do_capabilities(parser)
855 elif parser.check('list'):
856 do_list(parser)
857 elif parser.check('import'):
858 do_import(parser)
859 elif parser.check('export'):
860 do_export(parser)
861 else:
862 die('unhandled command: %s' % line)
863 sys.stdout.flush()
865 def bye():
866 if not marks:
867 return
868 if not is_tmp:
869 marks.store()
870 else:
871 shutil.rmtree(dirname)
873 atexit.register(bye)
874 sys.exit(main(sys.argv))