remote-hg: fix bad file paths
[git.git] / contrib / remote-helpers / git-remote-hg
bloba5f0013c627969c1741001c3b25bc450ad073a51
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 # For remote repositories a local clone is stored in
13 # "$GIT_DIR/hg/origin/clone/.hg/".
15 from mercurial import hg, ui, bookmarks, context, util, encoding, node, error
17 import re
18 import sys
19 import os
20 import json
21 import shutil
22 import subprocess
23 import urllib
24 import atexit
27 # If you want to switch to hg-git compatibility mode:
28 # git config --global remote-hg.hg-git-compat true
30 # If you are not in hg-git-compat mode and want to disable the tracking of
31 # named branches:
32 # git config --global remote-hg.track-branches false
34 # If you don't want to force pushes (and thus risk creating new remote heads):
35 # git config --global remote-hg.force-push false
37 # If you want the equivalent of hg's clone/pull--insecure option:
38 # git config remote-hg.insecure true
40 # git:
41 # Sensible defaults for git.
42 # hg bookmarks are exported as git branches, hg branches are prefixed
43 # with 'branches/', HEAD is a special case.
45 # hg:
46 # Emulate hg-git.
47 # Only hg bookmarks are exported as git branches.
48 # Commits are modified to preserve hg information and allow bidirectionality.
51 NAME_RE = re.compile('^([^<>]+)')
52 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
53 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
54 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
56 def die(msg, *args):
57 sys.stderr.write('ERROR: %s\n' % (msg % args))
58 sys.exit(1)
60 def warn(msg, *args):
61 sys.stderr.write('WARNING: %s\n' % (msg % args))
63 def gitmode(flags):
64 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
66 def gittz(tz):
67 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
69 def hgmode(mode):
70 m = { '100755': 'x', '120000': 'l' }
71 return m.get(mode, '')
73 def hghex(node):
74 return hg.node.hex(node)
76 def get_config(config):
77 cmd = ['git', 'config', '--get', config]
78 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
79 output, _ = process.communicate()
80 return output
82 class Marks:
84 def __init__(self, path):
85 self.path = path
86 self.tips = {}
87 self.marks = {}
88 self.rev_marks = {}
89 self.last_mark = 0
91 self.load()
93 def load(self):
94 if not os.path.exists(self.path):
95 return
97 tmp = json.load(open(self.path))
99 self.tips = tmp['tips']
100 self.marks = tmp['marks']
101 self.last_mark = tmp['last-mark']
103 for rev, mark in self.marks.iteritems():
104 self.rev_marks[mark] = int(rev)
106 def dict(self):
107 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
109 def store(self):
110 json.dump(self.dict(), open(self.path, 'w'))
112 def __str__(self):
113 return str(self.dict())
115 def from_rev(self, rev):
116 return self.marks[str(rev)]
118 def to_rev(self, mark):
119 return self.rev_marks[mark]
121 def get_mark(self, rev):
122 self.last_mark += 1
123 self.marks[str(rev)] = self.last_mark
124 return self.last_mark
126 def new_mark(self, rev, mark):
127 self.marks[str(rev)] = mark
128 self.rev_marks[mark] = rev
129 self.last_mark = mark
131 def is_marked(self, rev):
132 return self.marks.has_key(str(rev))
134 def get_tip(self, branch):
135 return self.tips.get(branch, 0)
137 def set_tip(self, branch, tip):
138 self.tips[branch] = tip
140 class Parser:
142 def __init__(self, repo):
143 self.repo = repo
144 self.line = self.get_line()
146 def get_line(self):
147 return sys.stdin.readline().strip()
149 def __getitem__(self, i):
150 return self.line.split()[i]
152 def check(self, word):
153 return self.line.startswith(word)
155 def each_block(self, separator):
156 while self.line != separator:
157 yield self.line
158 self.line = self.get_line()
160 def __iter__(self):
161 return self.each_block('')
163 def next(self):
164 self.line = self.get_line()
165 if self.line == 'done':
166 self.line = None
168 def get_mark(self):
169 i = self.line.index(':') + 1
170 return int(self.line[i:])
172 def get_data(self):
173 if not self.check('data'):
174 return None
175 i = self.line.index(' ') + 1
176 size = int(self.line[i:])
177 return sys.stdin.read(size)
179 def get_author(self):
180 global bad_mail
182 ex = None
183 m = RAW_AUTHOR_RE.match(self.line)
184 if not m:
185 return None
186 _, name, email, date, tz = m.groups()
187 if name and 'ext:' in name:
188 m = re.match('^(.+?) ext:\((.+)\)$', name)
189 if m:
190 name = m.group(1)
191 ex = urllib.unquote(m.group(2))
193 if email != bad_mail:
194 if name:
195 user = '%s <%s>' % (name, email)
196 else:
197 user = '<%s>' % (email)
198 else:
199 user = name
201 if ex:
202 user += ex
204 tz = int(tz)
205 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
206 return (user, int(date), -tz)
208 def fix_file_path(path):
209 if not os.path.isabs(path):
210 return path
211 return os.path.relpath(path, '/')
213 def export_file(fc):
214 d = fc.data()
215 path = fix_file_path(fc.path())
216 print "M %s inline %s" % (gitmode(fc.flags()), path)
217 print "data %d" % len(d)
218 print d
220 def get_filechanges(repo, ctx, parent):
221 modified = set()
222 added = set()
223 removed = set()
225 cur = ctx.manifest()
226 prev = repo[parent].manifest().copy()
228 for fn in cur:
229 if fn in prev:
230 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
231 modified.add(fn)
232 del prev[fn]
233 else:
234 added.add(fn)
235 removed |= set(prev.keys())
237 return added | modified, removed
239 def fixup_user_git(user):
240 name = mail = None
241 user = user.replace('"', '')
242 m = AUTHOR_RE.match(user)
243 if m:
244 name = m.group(1)
245 mail = m.group(2).strip()
246 else:
247 m = NAME_RE.match(user)
248 if m:
249 name = m.group(1).strip()
250 return (name, mail)
252 def fixup_user_hg(user):
253 def sanitize(name):
254 # stole this from hg-git
255 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
257 m = AUTHOR_HG_RE.match(user)
258 if m:
259 name = sanitize(m.group(1))
260 mail = sanitize(m.group(2))
261 ex = m.group(3)
262 if ex:
263 name += ' ext:(' + urllib.quote(ex) + ')'
264 else:
265 name = sanitize(user)
266 if '@' in user:
267 mail = name
268 else:
269 mail = None
271 return (name, mail)
273 def fixup_user(user):
274 global mode, bad_mail
276 if mode == 'git':
277 name, mail = fixup_user_git(user)
278 else:
279 name, mail = fixup_user_hg(user)
281 if not name:
282 name = bad_name
283 if not mail:
284 mail = bad_mail
286 return '%s <%s>' % (name, mail)
288 def get_repo(url, alias):
289 global dirname, peer
291 myui = ui.ui()
292 myui.setconfig('ui', 'interactive', 'off')
293 myui.fout = sys.stderr
295 try:
296 if get_config('remote-hg.insecure') == 'true\n':
297 myui.setconfig('web', 'cacerts', '')
298 except subprocess.CalledProcessError:
299 pass
301 if hg.islocal(url):
302 repo = hg.repository(myui, url)
303 else:
304 local_path = os.path.join(dirname, 'clone')
305 if not os.path.exists(local_path):
306 try:
307 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=True, pull=True)
308 except:
309 die('Repository error')
310 repo = dstpeer.local()
311 else:
312 repo = hg.repository(myui, local_path)
313 try:
314 peer = hg.peer(myui, {}, url)
315 except:
316 die('Repository error')
317 repo.pull(peer, heads=None, force=True)
319 return repo
321 def rev_to_mark(rev):
322 global marks
323 return marks.from_rev(rev)
325 def mark_to_rev(mark):
326 global marks
327 return marks.to_rev(mark)
329 def export_ref(repo, name, kind, head):
330 global prefix, marks, mode
332 ename = '%s/%s' % (kind, name)
333 tip = marks.get_tip(ename)
335 # mercurial takes too much time checking this
336 if tip and tip == head.rev():
337 # nothing to do
338 return
339 revs = xrange(tip, head.rev() + 1)
340 count = 0
342 revs = [rev for rev in revs if not marks.is_marked(rev)]
344 for rev in revs:
346 c = repo[rev]
347 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
348 rev_branch = extra['branch']
350 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
351 if 'committer' in extra:
352 user, time, tz = extra['committer'].rsplit(' ', 2)
353 committer = "%s %s %s" % (user, time, gittz(int(tz)))
354 else:
355 committer = author
357 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
359 if len(parents) == 0:
360 modified = c.manifest().keys()
361 removed = []
362 else:
363 modified, removed = get_filechanges(repo, c, parents[0])
365 if mode == 'hg':
366 extra_msg = ''
368 if rev_branch != 'default':
369 extra_msg += 'branch : %s\n' % rev_branch
371 renames = []
372 for f in c.files():
373 if f not in c.manifest():
374 continue
375 rename = c.filectx(f).renamed()
376 if rename:
377 renames.append((rename[0], f))
379 for e in renames:
380 extra_msg += "rename : %s => %s\n" % e
382 for key, value in extra.iteritems():
383 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
384 continue
385 else:
386 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
388 desc += '\n'
389 if extra_msg:
390 desc += '\n--HG--\n' + extra_msg
392 if len(parents) == 0 and rev:
393 print 'reset %s/%s' % (prefix, ename)
395 print "commit %s/%s" % (prefix, ename)
396 print "mark :%d" % (marks.get_mark(rev))
397 print "author %s" % (author)
398 print "committer %s" % (committer)
399 print "data %d" % (len(desc))
400 print desc
402 if len(parents) > 0:
403 print "from :%s" % (rev_to_mark(parents[0]))
404 if len(parents) > 1:
405 print "merge :%s" % (rev_to_mark(parents[1]))
407 for f in modified:
408 export_file(c.filectx(f))
409 for f in removed:
410 print "D %s" % (fix_file_path(f))
411 print
413 count += 1
414 if (count % 100 == 0):
415 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
416 print "#############################################################"
418 # make sure the ref is updated
419 print "reset %s/%s" % (prefix, ename)
420 print "from :%u" % rev_to_mark(rev)
421 print
423 marks.set_tip(ename, rev)
425 def export_tag(repo, tag):
426 export_ref(repo, tag, 'tags', repo[tag])
428 def export_bookmark(repo, bmark):
429 head = bmarks[bmark]
430 export_ref(repo, bmark, 'bookmarks', head)
432 def export_branch(repo, branch):
433 tip = get_branch_tip(repo, branch)
434 head = repo[tip]
435 export_ref(repo, branch, 'branches', head)
437 def export_head(repo):
438 global g_head
439 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
441 def do_capabilities(parser):
442 global prefix, dirname
444 print "import"
445 print "export"
446 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
447 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
448 print "refspec refs/tags/*:%s/tags/*" % prefix
450 path = os.path.join(dirname, 'marks-git')
452 if os.path.exists(path):
453 print "*import-marks %s" % path
454 print "*export-marks %s" % path
456 print
458 def get_branch_tip(repo, branch):
459 global branches
461 heads = branches.get(branch, None)
462 if not heads:
463 return None
465 # verify there's only one head
466 if (len(heads) > 1):
467 warn("Branch '%s' has more than one head, consider merging" % branch)
468 # older versions of mercurial don't have this
469 if hasattr(repo, "branchtip"):
470 return repo.branchtip(branch)
472 return heads[0]
474 def list_head(repo, cur):
475 global g_head, bmarks
477 head = bookmarks.readcurrent(repo)
478 if head:
479 node = repo[head]
480 else:
481 # fake bookmark from current branch
482 head = cur
483 node = repo['.']
484 if not node:
485 node = repo['tip']
486 if not node:
487 return
488 if head == 'default':
489 head = 'master'
490 bmarks[head] = node
492 print "@refs/heads/%s HEAD" % head
493 g_head = (head, node)
495 def do_list(parser):
496 global branches, bmarks, mode, track_branches
498 repo = parser.repo
499 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
500 bmarks[bmark] = repo[node]
502 cur = repo.dirstate.branch()
504 list_head(repo, cur)
506 if track_branches:
507 for branch in repo.branchmap():
508 heads = repo.branchheads(branch)
509 if len(heads):
510 branches[branch] = heads
512 for branch in branches:
513 print "? refs/heads/branches/%s" % branch
515 for bmark in bmarks:
516 print "? refs/heads/%s" % bmark
518 for tag, node in repo.tagslist():
519 if tag == 'tip':
520 continue
521 print "? refs/tags/%s" % tag
523 print
525 def do_import(parser):
526 repo = parser.repo
528 path = os.path.join(dirname, 'marks-git')
530 print "feature done"
531 if os.path.exists(path):
532 print "feature import-marks=%s" % path
533 print "feature export-marks=%s" % path
534 sys.stdout.flush()
536 tmp = encoding.encoding
537 encoding.encoding = 'utf-8'
539 # lets get all the import lines
540 while parser.check('import'):
541 ref = parser[1]
543 if (ref == 'HEAD'):
544 export_head(repo)
545 elif ref.startswith('refs/heads/branches/'):
546 branch = ref[len('refs/heads/branches/'):]
547 export_branch(repo, branch)
548 elif ref.startswith('refs/heads/'):
549 bmark = ref[len('refs/heads/'):]
550 export_bookmark(repo, bmark)
551 elif ref.startswith('refs/tags/'):
552 tag = ref[len('refs/tags/'):]
553 export_tag(repo, tag)
555 parser.next()
557 encoding.encoding = tmp
559 print 'done'
561 def parse_blob(parser):
562 global blob_marks
564 parser.next()
565 mark = parser.get_mark()
566 parser.next()
567 data = parser.get_data()
568 blob_marks[mark] = data
569 parser.next()
571 def get_merge_files(repo, p1, p2, files):
572 for e in repo[p1].files():
573 if e not in files:
574 if e not in repo[p1].manifest():
575 continue
576 f = { 'ctx' : repo[p1][e] }
577 files[e] = f
579 def parse_commit(parser):
580 global marks, blob_marks, parsed_refs
581 global mode
583 from_mark = merge_mark = None
585 ref = parser[1]
586 parser.next()
588 commit_mark = parser.get_mark()
589 parser.next()
590 author = parser.get_author()
591 parser.next()
592 committer = parser.get_author()
593 parser.next()
594 data = parser.get_data()
595 parser.next()
596 if parser.check('from'):
597 from_mark = parser.get_mark()
598 parser.next()
599 if parser.check('merge'):
600 merge_mark = parser.get_mark()
601 parser.next()
602 if parser.check('merge'):
603 die('octopus merges are not supported yet')
605 files = {}
607 for line in parser:
608 if parser.check('M'):
609 t, m, mark_ref, path = line.split(' ', 3)
610 mark = int(mark_ref[1:])
611 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
612 elif parser.check('D'):
613 t, path = line.split(' ', 1)
614 f = { 'deleted' : True }
615 else:
616 die('Unknown file command: %s' % line)
617 files[path] = f
619 def getfilectx(repo, memctx, f):
620 of = files[f]
621 if 'deleted' in of:
622 raise IOError
623 if 'ctx' in of:
624 return of['ctx']
625 is_exec = of['mode'] == 'x'
626 is_link = of['mode'] == 'l'
627 rename = of.get('rename', None)
628 return context.memfilectx(f, of['data'],
629 is_link, is_exec, rename)
631 repo = parser.repo
633 user, date, tz = author
634 extra = {}
636 if committer != author:
637 extra['committer'] = "%s %u %u" % committer
639 if from_mark:
640 p1 = repo.changelog.node(mark_to_rev(from_mark))
641 else:
642 p1 = '\0' * 20
644 if merge_mark:
645 p2 = repo.changelog.node(mark_to_rev(merge_mark))
646 else:
647 p2 = '\0' * 20
650 # If files changed from any of the parents, hg wants to know, but in git if
651 # nothing changed from the first parent, nothing changed.
653 if merge_mark:
654 get_merge_files(repo, p1, p2, files)
656 # Check if the ref is supposed to be a named branch
657 if ref.startswith('refs/heads/branches/'):
658 extra['branch'] = ref[len('refs/heads/branches/'):]
660 if mode == 'hg':
661 i = data.find('\n--HG--\n')
662 if i >= 0:
663 tmp = data[i + len('\n--HG--\n'):].strip()
664 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
665 if k == 'rename':
666 old, new = v.split(' => ', 1)
667 files[new]['rename'] = old
668 elif k == 'branch':
669 extra[k] = v
670 elif k == 'extra':
671 ek, ev = v.split(' : ', 1)
672 extra[ek] = urllib.unquote(ev)
673 data = data[:i]
675 ctx = context.memctx(repo, (p1, p2), data,
676 files.keys(), getfilectx,
677 user, (date, tz), extra)
679 tmp = encoding.encoding
680 encoding.encoding = 'utf-8'
682 node = repo.commitctx(ctx)
684 encoding.encoding = tmp
686 rev = repo[node].rev()
688 parsed_refs[ref] = node
689 marks.new_mark(rev, commit_mark)
691 def parse_reset(parser):
692 global parsed_refs
694 ref = parser[1]
695 parser.next()
696 # ugh
697 if parser.check('commit'):
698 parse_commit(parser)
699 return
700 if not parser.check('from'):
701 return
702 from_mark = parser.get_mark()
703 parser.next()
705 node = parser.repo.changelog.node(mark_to_rev(from_mark))
706 parsed_refs[ref] = node
708 def parse_tag(parser):
709 name = parser[1]
710 parser.next()
711 from_mark = parser.get_mark()
712 parser.next()
713 tagger = parser.get_author()
714 parser.next()
715 data = parser.get_data()
716 parser.next()
718 # nothing to do
720 def do_export(parser):
721 global parsed_refs, bmarks, peer
723 p_bmarks = []
725 parser.next()
727 for line in parser.each_block('done'):
728 if parser.check('blob'):
729 parse_blob(parser)
730 elif parser.check('commit'):
731 parse_commit(parser)
732 elif parser.check('reset'):
733 parse_reset(parser)
734 elif parser.check('tag'):
735 parse_tag(parser)
736 elif parser.check('feature'):
737 pass
738 else:
739 die('unhandled export command: %s' % line)
741 for ref, node in parsed_refs.iteritems():
742 if ref.startswith('refs/heads/branches'):
743 print "ok %s" % ref
744 elif ref.startswith('refs/heads/'):
745 bmark = ref[len('refs/heads/'):]
746 p_bmarks.append((bmark, node))
747 continue
748 elif ref.startswith('refs/tags/'):
749 tag = ref[len('refs/tags/'):]
750 if mode == 'git':
751 msg = 'Added tag %s for changeset %s' % (tag, hghex(node[:6]));
752 parser.repo.tag([tag], node, msg, False, None, {})
753 else:
754 parser.repo.tag([tag], node, None, True, None, {})
755 print "ok %s" % ref
756 else:
757 # transport-helper/fast-export bugs
758 continue
760 if peer:
761 parser.repo.push(peer, force=force_push)
763 # handle bookmarks
764 for bmark, node in p_bmarks:
765 ref = 'refs/heads/' + bmark
766 new = hghex(node)
768 if bmark in bmarks:
769 old = bmarks[bmark].hex()
770 else:
771 old = ''
773 if bmark == 'master' and 'master' not in parser.repo._bookmarks:
774 # fake bookmark
775 pass
776 elif bookmarks.pushbookmark(parser.repo, bmark, old, new):
777 # updated locally
778 pass
779 else:
780 print "error %s" % ref
781 continue
783 if peer:
784 if not peer.pushkey('bookmarks', bmark, old, new):
785 print "error %s" % ref
786 continue
788 print "ok %s" % ref
790 print
792 def fix_path(alias, repo, orig_url):
793 repo_url = util.url(repo.url())
794 url = util.url(orig_url)
795 if str(url) == str(repo_url):
796 return
797 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
798 subprocess.call(cmd)
800 def main(args):
801 global prefix, dirname, branches, bmarks
802 global marks, blob_marks, parsed_refs
803 global peer, mode, bad_mail, bad_name
804 global track_branches, force_push, is_tmp
806 alias = args[1]
807 url = args[2]
808 peer = None
810 hg_git_compat = False
811 track_branches = True
812 force_push = True
814 try:
815 if get_config('remote-hg.hg-git-compat') == 'true\n':
816 hg_git_compat = True
817 track_branches = False
818 if get_config('remote-hg.track-branches') == 'false\n':
819 track_branches = False
820 if get_config('remote-hg.force-push') == 'false\n':
821 force_push = False
822 except subprocess.CalledProcessError:
823 pass
825 if hg_git_compat:
826 mode = 'hg'
827 bad_mail = 'none@none'
828 bad_name = ''
829 else:
830 mode = 'git'
831 bad_mail = 'unknown'
832 bad_name = 'Unknown'
834 if alias[4:] == url:
835 is_tmp = True
836 alias = util.sha1(alias).hexdigest()
837 else:
838 is_tmp = False
840 gitdir = os.environ['GIT_DIR']
841 dirname = os.path.join(gitdir, 'hg', alias)
842 branches = {}
843 bmarks = {}
844 blob_marks = {}
845 parsed_refs = {}
846 marks = None
848 repo = get_repo(url, alias)
849 prefix = 'refs/hg/%s' % alias
851 if not is_tmp:
852 fix_path(alias, peer or repo, url)
854 if not os.path.exists(dirname):
855 os.makedirs(dirname)
857 marks_path = os.path.join(dirname, 'marks-hg')
858 marks = Marks(marks_path)
860 parser = Parser(repo)
861 for line in parser:
862 if parser.check('capabilities'):
863 do_capabilities(parser)
864 elif parser.check('list'):
865 do_list(parser)
866 elif parser.check('import'):
867 do_import(parser)
868 elif parser.check('export'):
869 do_export(parser)
870 else:
871 die('unhandled command: %s' % line)
872 sys.stdout.flush()
874 def bye():
875 if not marks:
876 return
877 if not is_tmp:
878 marks.store()
879 else:
880 shutil.rmtree(dirname)
882 atexit.register(bye)
883 sys.exit(main(sys.argv))