remote-hg: update remote bookmarks
[git.git] / contrib / remote-helpers / git-remote-hg
blob46cddc93aeb5e54a05a198b00f299ef0bff31d02
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding, node
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # If you are not in hg-git-compat mode and want to disable the tracking of
27 # named branches:
28 # git config --global remote-hg.track-branches false
30 # git:
31 # Sensible defaults for git.
32 # hg bookmarks are exported as git branches, hg branches are prefixed
33 # with 'branches/', HEAD is a special case.
35 # hg:
36 # Emulate hg-git.
37 # Only hg bookmarks are exported as git branches.
38 # Commits are modified to preserve hg information and allow bidirectionality.
41 NAME_RE = re.compile('^([^<>]+)')
42 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
43 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
44 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
46 def die(msg, *args):
47 sys.stderr.write('ERROR: %s\n' % (msg % args))
48 sys.exit(1)
50 def warn(msg, *args):
51 sys.stderr.write('WARNING: %s\n' % (msg % args))
53 def gitmode(flags):
54 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
56 def gittz(tz):
57 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
59 def hgmode(mode):
60 m = { '100755': 'x', '120000': 'l' }
61 return m.get(mode, '')
63 def hghex(node):
64 return hg.node.hex(node)
66 def get_config(config):
67 cmd = ['git', 'config', '--get', config]
68 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
69 output, _ = process.communicate()
70 return output
72 class Marks:
74 def __init__(self, path):
75 self.path = path
76 self.tips = {}
77 self.marks = {}
78 self.rev_marks = {}
79 self.last_mark = 0
81 self.load()
83 def load(self):
84 if not os.path.exists(self.path):
85 return
87 tmp = json.load(open(self.path))
89 self.tips = tmp['tips']
90 self.marks = tmp['marks']
91 self.last_mark = tmp['last-mark']
93 for rev, mark in self.marks.iteritems():
94 self.rev_marks[mark] = int(rev)
96 def dict(self):
97 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
99 def store(self):
100 json.dump(self.dict(), open(self.path, 'w'))
102 def __str__(self):
103 return str(self.dict())
105 def from_rev(self, rev):
106 return self.marks[str(rev)]
108 def to_rev(self, mark):
109 return self.rev_marks[mark]
111 def get_mark(self, rev):
112 self.last_mark += 1
113 self.marks[str(rev)] = self.last_mark
114 return self.last_mark
116 def new_mark(self, rev, mark):
117 self.marks[str(rev)] = mark
118 self.rev_marks[mark] = rev
119 self.last_mark = mark
121 def is_marked(self, rev):
122 return self.marks.has_key(str(rev))
124 def get_tip(self, branch):
125 return self.tips.get(branch, 0)
127 def set_tip(self, branch, tip):
128 self.tips[branch] = tip
130 class Parser:
132 def __init__(self, repo):
133 self.repo = repo
134 self.line = self.get_line()
136 def get_line(self):
137 return sys.stdin.readline().strip()
139 def __getitem__(self, i):
140 return self.line.split()[i]
142 def check(self, word):
143 return self.line.startswith(word)
145 def each_block(self, separator):
146 while self.line != separator:
147 yield self.line
148 self.line = self.get_line()
150 def __iter__(self):
151 return self.each_block('')
153 def next(self):
154 self.line = self.get_line()
155 if self.line == 'done':
156 self.line = None
158 def get_mark(self):
159 i = self.line.index(':') + 1
160 return int(self.line[i:])
162 def get_data(self):
163 if not self.check('data'):
164 return None
165 i = self.line.index(' ') + 1
166 size = int(self.line[i:])
167 return sys.stdin.read(size)
169 def get_author(self):
170 global bad_mail
172 ex = None
173 m = RAW_AUTHOR_RE.match(self.line)
174 if not m:
175 return None
176 _, name, email, date, tz = m.groups()
177 if name and 'ext:' in name:
178 m = re.match('^(.+?) ext:\((.+)\)$', name)
179 if m:
180 name = m.group(1)
181 ex = urllib.unquote(m.group(2))
183 if email != bad_mail:
184 if name:
185 user = '%s <%s>' % (name, email)
186 else:
187 user = '<%s>' % (email)
188 else:
189 user = name
191 if ex:
192 user += ex
194 tz = int(tz)
195 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
196 return (user, int(date), -tz)
198 def export_file(fc):
199 d = fc.data()
200 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
201 print "data %d" % len(d)
202 print d
204 def get_filechanges(repo, ctx, parent):
205 modified = set()
206 added = set()
207 removed = set()
209 cur = ctx.manifest()
210 prev = repo[parent].manifest().copy()
212 for fn in cur:
213 if fn in prev:
214 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
215 modified.add(fn)
216 del prev[fn]
217 else:
218 added.add(fn)
219 removed |= set(prev.keys())
221 return added | modified, removed
223 def fixup_user_git(user):
224 name = mail = None
225 user = user.replace('"', '')
226 m = AUTHOR_RE.match(user)
227 if m:
228 name = m.group(1)
229 mail = m.group(2).strip()
230 else:
231 m = NAME_RE.match(user)
232 if m:
233 name = m.group(1).strip()
234 return (name, mail)
236 def fixup_user_hg(user):
237 def sanitize(name):
238 # stole this from hg-git
239 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
241 m = AUTHOR_HG_RE.match(user)
242 if m:
243 name = sanitize(m.group(1))
244 mail = sanitize(m.group(2))
245 ex = m.group(3)
246 if ex:
247 name += ' ext:(' + urllib.quote(ex) + ')'
248 else:
249 name = sanitize(user)
250 if '@' in user:
251 mail = name
252 else:
253 mail = None
255 return (name, mail)
257 def fixup_user(user):
258 global mode, bad_mail
260 if mode == 'git':
261 name, mail = fixup_user_git(user)
262 else:
263 name, mail = fixup_user_hg(user)
265 if not name:
266 name = bad_name
267 if not mail:
268 mail = bad_mail
270 return '%s <%s>' % (name, mail)
272 def get_repo(url, alias):
273 global dirname, peer
275 myui = ui.ui()
276 myui.setconfig('ui', 'interactive', 'off')
277 myui.fout = sys.stderr
279 if hg.islocal(url):
280 repo = hg.repository(myui, url)
281 else:
282 local_path = os.path.join(dirname, 'clone')
283 if not os.path.exists(local_path):
284 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
285 repo = dstpeer.local()
286 else:
287 repo = hg.repository(myui, local_path)
288 peer = hg.peer(myui, {}, url)
289 repo.pull(peer, heads=None, force=True)
291 return repo
293 def rev_to_mark(rev):
294 global marks
295 return marks.from_rev(rev)
297 def mark_to_rev(mark):
298 global marks
299 return marks.to_rev(mark)
301 def export_ref(repo, name, kind, head):
302 global prefix, marks, mode
304 ename = '%s/%s' % (kind, name)
305 tip = marks.get_tip(ename)
307 # mercurial takes too much time checking this
308 if tip and tip == head.rev():
309 # nothing to do
310 return
311 revs = xrange(tip, head.rev() + 1)
312 count = 0
314 revs = [rev for rev in revs if not marks.is_marked(rev)]
316 for rev in revs:
318 c = repo[rev]
319 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
320 rev_branch = extra['branch']
322 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
323 if 'committer' in extra:
324 user, time, tz = extra['committer'].rsplit(' ', 2)
325 committer = "%s %s %s" % (user, time, gittz(int(tz)))
326 else:
327 committer = author
329 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
331 if len(parents) == 0:
332 modified = c.manifest().keys()
333 removed = []
334 else:
335 modified, removed = get_filechanges(repo, c, parents[0])
337 if mode == 'hg':
338 extra_msg = ''
340 if rev_branch != 'default':
341 extra_msg += 'branch : %s\n' % rev_branch
343 renames = []
344 for f in c.files():
345 if f not in c.manifest():
346 continue
347 rename = c.filectx(f).renamed()
348 if rename:
349 renames.append((rename[0], f))
351 for e in renames:
352 extra_msg += "rename : %s => %s\n" % e
354 for key, value in extra.iteritems():
355 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
356 continue
357 else:
358 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
360 desc += '\n'
361 if extra_msg:
362 desc += '\n--HG--\n' + extra_msg
364 if len(parents) == 0 and rev:
365 print 'reset %s/%s' % (prefix, ename)
367 print "commit %s/%s" % (prefix, ename)
368 print "mark :%d" % (marks.get_mark(rev))
369 print "author %s" % (author)
370 print "committer %s" % (committer)
371 print "data %d" % (len(desc))
372 print desc
374 if len(parents) > 0:
375 print "from :%s" % (rev_to_mark(parents[0]))
376 if len(parents) > 1:
377 print "merge :%s" % (rev_to_mark(parents[1]))
379 for f in modified:
380 export_file(c.filectx(f))
381 for f in removed:
382 print "D %s" % (f)
383 print
385 count += 1
386 if (count % 100 == 0):
387 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
388 print "#############################################################"
390 # make sure the ref is updated
391 print "reset %s/%s" % (prefix, ename)
392 print "from :%u" % rev_to_mark(rev)
393 print
395 marks.set_tip(ename, rev)
397 def export_tag(repo, tag):
398 export_ref(repo, tag, 'tags', repo[tag])
400 def export_bookmark(repo, bmark):
401 head = bmarks[bmark]
402 export_ref(repo, bmark, 'bookmarks', head)
404 def export_branch(repo, branch):
405 tip = get_branch_tip(repo, branch)
406 head = repo[tip]
407 export_ref(repo, branch, 'branches', head)
409 def export_head(repo):
410 global g_head
411 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
413 def do_capabilities(parser):
414 global prefix, dirname
416 print "import"
417 print "export"
418 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
419 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
420 print "refspec refs/tags/*:%s/tags/*" % prefix
422 path = os.path.join(dirname, 'marks-git')
424 if os.path.exists(path):
425 print "*import-marks %s" % path
426 print "*export-marks %s" % path
428 print
430 def get_branch_tip(repo, branch):
431 global branches
433 heads = branches.get(branch, None)
434 if not heads:
435 return None
437 # verify there's only one head
438 if (len(heads) > 1):
439 warn("Branch '%s' has more than one head, consider merging" % branch)
440 # older versions of mercurial don't have this
441 if hasattr(repo, "branchtip"):
442 return repo.branchtip(branch)
444 return heads[0]
446 def list_head(repo, cur):
447 global g_head, bmarks
449 head = bookmarks.readcurrent(repo)
450 if head:
451 node = repo[head]
452 else:
453 # fake bookmark from current branch
454 head = cur
455 node = repo['.']
456 if not node:
457 node = repo['tip']
458 if not node:
459 return
460 if head == 'default':
461 head = 'master'
462 bmarks[head] = node
464 print "@refs/heads/%s HEAD" % head
465 g_head = (head, node)
467 def do_list(parser):
468 global branches, bmarks, mode, track_branches
470 repo = parser.repo
471 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
472 bmarks[bmark] = repo[node]
474 cur = repo.dirstate.branch()
476 list_head(repo, cur)
478 if track_branches:
479 for branch in repo.branchmap():
480 heads = repo.branchheads(branch)
481 if len(heads):
482 branches[branch] = heads
484 for branch in branches:
485 print "? refs/heads/branches/%s" % branch
487 for bmark in bmarks:
488 print "? refs/heads/%s" % bmark
490 for tag, node in repo.tagslist():
491 if tag == 'tip':
492 continue
493 print "? refs/tags/%s" % tag
495 print
497 def do_import(parser):
498 repo = parser.repo
500 path = os.path.join(dirname, 'marks-git')
502 print "feature done"
503 if os.path.exists(path):
504 print "feature import-marks=%s" % path
505 print "feature export-marks=%s" % path
506 sys.stdout.flush()
508 tmp = encoding.encoding
509 encoding.encoding = 'utf-8'
511 # lets get all the import lines
512 while parser.check('import'):
513 ref = parser[1]
515 if (ref == 'HEAD'):
516 export_head(repo)
517 elif ref.startswith('refs/heads/branches/'):
518 branch = ref[len('refs/heads/branches/'):]
519 export_branch(repo, branch)
520 elif ref.startswith('refs/heads/'):
521 bmark = ref[len('refs/heads/'):]
522 export_bookmark(repo, bmark)
523 elif ref.startswith('refs/tags/'):
524 tag = ref[len('refs/tags/'):]
525 export_tag(repo, tag)
527 parser.next()
529 encoding.encoding = tmp
531 print 'done'
533 def parse_blob(parser):
534 global blob_marks
536 parser.next()
537 mark = parser.get_mark()
538 parser.next()
539 data = parser.get_data()
540 blob_marks[mark] = data
541 parser.next()
543 def get_merge_files(repo, p1, p2, files):
544 for e in repo[p1].files():
545 if e not in files:
546 if e not in repo[p1].manifest():
547 continue
548 f = { 'ctx' : repo[p1][e] }
549 files[e] = f
551 def parse_commit(parser):
552 global marks, blob_marks, parsed_refs
553 global mode
555 from_mark = merge_mark = None
557 ref = parser[1]
558 parser.next()
560 commit_mark = parser.get_mark()
561 parser.next()
562 author = parser.get_author()
563 parser.next()
564 committer = parser.get_author()
565 parser.next()
566 data = parser.get_data()
567 parser.next()
568 if parser.check('from'):
569 from_mark = parser.get_mark()
570 parser.next()
571 if parser.check('merge'):
572 merge_mark = parser.get_mark()
573 parser.next()
574 if parser.check('merge'):
575 die('octopus merges are not supported yet')
577 files = {}
579 for line in parser:
580 if parser.check('M'):
581 t, m, mark_ref, path = line.split(' ', 3)
582 mark = int(mark_ref[1:])
583 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
584 elif parser.check('D'):
585 t, path = line.split(' ', 1)
586 f = { 'deleted' : True }
587 else:
588 die('Unknown file command: %s' % line)
589 files[path] = f
591 def getfilectx(repo, memctx, f):
592 of = files[f]
593 if 'deleted' in of:
594 raise IOError
595 if 'ctx' in of:
596 return of['ctx']
597 is_exec = of['mode'] == 'x'
598 is_link = of['mode'] == 'l'
599 rename = of.get('rename', None)
600 return context.memfilectx(f, of['data'],
601 is_link, is_exec, rename)
603 repo = parser.repo
605 user, date, tz = author
606 extra = {}
608 if committer != author:
609 extra['committer'] = "%s %u %u" % committer
611 if from_mark:
612 p1 = repo.changelog.node(mark_to_rev(from_mark))
613 else:
614 p1 = '\0' * 20
616 if merge_mark:
617 p2 = repo.changelog.node(mark_to_rev(merge_mark))
618 else:
619 p2 = '\0' * 20
622 # If files changed from any of the parents, hg wants to know, but in git if
623 # nothing changed from the first parent, nothing changed.
625 if merge_mark:
626 get_merge_files(repo, p1, p2, files)
628 if mode == 'hg':
629 i = data.find('\n--HG--\n')
630 if i >= 0:
631 tmp = data[i + len('\n--HG--\n'):].strip()
632 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
633 if k == 'rename':
634 old, new = v.split(' => ', 1)
635 files[new]['rename'] = old
636 elif k == 'branch':
637 extra[k] = v
638 elif k == 'extra':
639 ek, ev = v.split(' : ', 1)
640 extra[ek] = urllib.unquote(ev)
641 data = data[:i]
643 ctx = context.memctx(repo, (p1, p2), data,
644 files.keys(), getfilectx,
645 user, (date, tz), extra)
647 tmp = encoding.encoding
648 encoding.encoding = 'utf-8'
650 node = repo.commitctx(ctx)
652 encoding.encoding = tmp
654 rev = repo[node].rev()
656 parsed_refs[ref] = node
657 marks.new_mark(rev, commit_mark)
659 def parse_reset(parser):
660 global parsed_refs
662 ref = parser[1]
663 parser.next()
664 # ugh
665 if parser.check('commit'):
666 parse_commit(parser)
667 return
668 if not parser.check('from'):
669 return
670 from_mark = parser.get_mark()
671 parser.next()
673 node = parser.repo.changelog.node(mark_to_rev(from_mark))
674 parsed_refs[ref] = node
676 def parse_tag(parser):
677 name = parser[1]
678 parser.next()
679 from_mark = parser.get_mark()
680 parser.next()
681 tagger = parser.get_author()
682 parser.next()
683 data = parser.get_data()
684 parser.next()
686 # nothing to do
688 def do_export(parser):
689 global parsed_refs, bmarks, peer
691 p_bmarks = []
693 parser.next()
695 for line in parser.each_block('done'):
696 if parser.check('blob'):
697 parse_blob(parser)
698 elif parser.check('commit'):
699 parse_commit(parser)
700 elif parser.check('reset'):
701 parse_reset(parser)
702 elif parser.check('tag'):
703 parse_tag(parser)
704 elif parser.check('feature'):
705 pass
706 else:
707 die('unhandled export command: %s' % line)
709 for ref, node in parsed_refs.iteritems():
710 if ref.startswith('refs/heads/branches'):
711 print "ok %s" % ref
712 elif ref.startswith('refs/heads/'):
713 bmark = ref[len('refs/heads/'):]
714 p_bmarks.append((bmark, node))
715 continue
716 elif ref.startswith('refs/tags/'):
717 tag = ref[len('refs/tags/'):]
718 parser.repo.tag([tag], node, None, True, None, {})
719 print "ok %s" % ref
720 else:
721 # transport-helper/fast-export bugs
722 continue
724 if peer:
725 parser.repo.push(peer, force=False)
727 # handle bookmarks
728 for bmark, node in p_bmarks:
729 ref = 'refs/heads/' + bmark
730 new = hghex(node)
732 if bmark in bmarks:
733 old = bmarks[bmark].hex()
734 else:
735 old = ''
737 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
738 # fake bookmark
739 pass
740 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
741 # updated locally
742 pass
743 else:
744 print "error %s" % ref
745 continue
747 if peer:
748 if not peer.pushkey('bookmarks', bmark, old, new):
749 print "error %s" % ref
750 continue
752 print "ok %s" % ref
754 print
756 def fix_path(alias, repo, orig_url):
757 repo_url = util.url(repo.url())
758 url = util.url(orig_url)
759 if str(url) == str(repo_url):
760 return
761 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
762 subprocess.call(cmd)
764 def main(args):
765 global prefix, dirname, branches, bmarks
766 global marks, blob_marks, parsed_refs
767 global peer, mode, bad_mail, bad_name
768 global track_branches
770 alias = args[1]
771 url = args[2]
772 peer = None
774 hg_git_compat = False
775 track_branches = True
776 try:
777 if get_config('remote-hg.hg-git-compat') == 'true\n':
778 hg_git_compat = True
779 track_branches = False
780 if get_config('remote-hg.track-branches') == 'false\n':
781 track_branches = False
782 except subprocess.CalledProcessError:
783 pass
785 if hg_git_compat:
786 mode = 'hg'
787 bad_mail = 'none@none'
788 bad_name = ''
789 else:
790 mode = 'git'
791 bad_mail = 'unknown'
792 bad_name = 'Unknown'
794 if alias[4:] == url:
795 is_tmp = True
796 alias = util.sha1(alias).hexdigest()
797 else:
798 is_tmp = False
800 gitdir = os.environ['GIT_DIR']
801 dirname = os.path.join(gitdir, 'hg', alias)
802 branches = {}
803 bmarks = {}
804 blob_marks = {}
805 parsed_refs = {}
807 repo = get_repo(url, alias)
808 prefix = 'refs/hg/%s' % alias
810 if not is_tmp:
811 fix_path(alias, peer or repo, url)
813 if not os.path.exists(dirname):
814 os.makedirs(dirname)
816 marks_path = os.path.join(dirname, 'marks-hg')
817 marks = Marks(marks_path)
819 parser = Parser(repo)
820 for line in parser:
821 if parser.check('capabilities'):
822 do_capabilities(parser)
823 elif parser.check('list'):
824 do_list(parser)
825 elif parser.check('import'):
826 do_import(parser)
827 elif parser.check('export'):
828 do_export(parser)
829 else:
830 die('unhandled command: %s' % line)
831 sys.stdout.flush()
833 if not is_tmp:
834 marks.store()
835 else:
836 shutil.rmtree(dirname)
838 sys.exit(main(sys.argv))