remote-hg: add compat for hg-git author fixes
[alt-git.git] / contrib / remote-helpers / git-remote-hg
blob9db4b7e59c746f2bb8ce823b12dbaa48e11196b4
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # git:
27 # Sensible defaults for git.
28 # hg bookmarks are exported as git branches, hg branches are prefixed
29 # with 'branches/'.
31 # hg:
32 # Emulate hg-git.
33 # Only hg bookmarks are exported as git branches.
34 # Commits are modified to preserve hg information and allow biridectionality.
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]+)>$')
39 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.+?)(?:>(.+)?)?$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.+)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gitmode(flags):
50 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
52 def gittz(tz):
53 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
55 def hgmode(mode):
56 m = { '0100755': 'x', '0120000': 'l' }
57 return m.get(mode, '')
59 class Marks:
61 def __init__(self, path):
62 self.path = path
63 self.tips = {}
64 self.marks = {}
65 self.rev_marks = {}
66 self.last_mark = 0
68 self.load()
70 def load(self):
71 if not os.path.exists(self.path):
72 return
74 tmp = json.load(open(self.path))
76 self.tips = tmp['tips']
77 self.marks = tmp['marks']
78 self.last_mark = tmp['last-mark']
80 for rev, mark in self.marks.iteritems():
81 self.rev_marks[mark] = int(rev)
83 def dict(self):
84 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
86 def store(self):
87 json.dump(self.dict(), open(self.path, 'w'))
89 def __str__(self):
90 return str(self.dict())
92 def from_rev(self, rev):
93 return self.marks[str(rev)]
95 def to_rev(self, mark):
96 return self.rev_marks[mark]
98 def get_mark(self, rev):
99 self.last_mark += 1
100 self.marks[str(rev)] = self.last_mark
101 return self.last_mark
103 def new_mark(self, rev, mark):
104 self.marks[str(rev)] = mark
105 self.rev_marks[mark] = rev
106 self.last_mark = mark
108 def is_marked(self, rev):
109 return self.marks.has_key(str(rev))
111 def get_tip(self, branch):
112 return self.tips.get(branch, 0)
114 def set_tip(self, branch, tip):
115 self.tips[branch] = tip
117 class Parser:
119 def __init__(self, repo):
120 self.repo = repo
121 self.line = self.get_line()
123 def get_line(self):
124 return sys.stdin.readline().strip()
126 def __getitem__(self, i):
127 return self.line.split()[i]
129 def check(self, word):
130 return self.line.startswith(word)
132 def each_block(self, separator):
133 while self.line != separator:
134 yield self.line
135 self.line = self.get_line()
137 def __iter__(self):
138 return self.each_block('')
140 def next(self):
141 self.line = self.get_line()
142 if self.line == 'done':
143 self.line = None
145 def get_mark(self):
146 i = self.line.index(':') + 1
147 return int(self.line[i:])
149 def get_data(self):
150 if not self.check('data'):
151 return None
152 i = self.line.index(' ') + 1
153 size = int(self.line[i:])
154 return sys.stdin.read(size)
156 def get_author(self):
157 global bad_mail
159 ex = None
160 m = RAW_AUTHOR_RE.match(self.line)
161 if not m:
162 return None
163 _, name, email, date, tz = m.groups()
164 if name and 'ext:' in name:
165 m = re.match('^(.+?) ext:\((.+)\)$', name)
166 if m:
167 name = m.group(1)
168 ex = urllib.unquote(m.group(2))
170 if email != bad_mail:
171 if name:
172 user = '%s <%s>' % (name, email)
173 else:
174 user = '<%s>' % (email)
175 else:
176 user = name
178 if ex:
179 user += ex
181 tz = int(tz)
182 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
183 return (user, int(date), -tz)
185 def export_file(fc):
186 d = fc.data()
187 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
188 print "data %d" % len(d)
189 print d
191 def get_filechanges(repo, ctx, parent):
192 modified = set()
193 added = set()
194 removed = set()
196 cur = ctx.manifest()
197 prev = repo[parent].manifest().copy()
199 for fn in cur:
200 if fn in prev:
201 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
202 modified.add(fn)
203 del prev[fn]
204 else:
205 added.add(fn)
206 removed |= set(prev.keys())
208 return added | modified, removed
210 def fixup_user_git(user):
211 name = mail = None
212 user = user.replace('"', '')
213 m = AUTHOR_RE.match(user)
214 if m:
215 name = m.group(1)
216 mail = m.group(2).strip()
217 else:
218 m = NAME_RE.match(user)
219 if m:
220 name = m.group(1).strip()
221 return (name, mail)
223 def fixup_user_hg(user):
224 def sanitize(name):
225 # stole this from hg-git
226 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
228 m = AUTHOR_HG_RE.match(user)
229 if m:
230 name = sanitize(m.group(1))
231 mail = sanitize(m.group(2))
232 ex = m.group(3)
233 if ex:
234 name += ' ext:(' + urllib.quote(ex) + ')'
235 else:
236 name = sanitize(user)
237 if '@' in user:
238 mail = name
239 else:
240 mail = None
242 return (name, mail)
244 def fixup_user(user):
245 global mode, bad_mail
247 if mode == 'git':
248 name, mail = fixup_user_git(user)
249 else:
250 name, mail = fixup_user_hg(user)
252 if not name:
253 name = bad_name
254 if not mail:
255 mail = bad_mail
257 return '%s <%s>' % (name, mail)
259 def get_repo(url, alias):
260 global dirname, peer
262 myui = ui.ui()
263 myui.setconfig('ui', 'interactive', 'off')
265 if hg.islocal(url):
266 repo = hg.repository(myui, url)
267 else:
268 local_path = os.path.join(dirname, 'clone')
269 if not os.path.exists(local_path):
270 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
271 repo = dstpeer.local()
272 else:
273 repo = hg.repository(myui, local_path)
274 peer = hg.peer(myui, {}, url)
275 repo.pull(peer, heads=None, force=True)
277 return repo
279 def rev_to_mark(rev):
280 global marks
281 return marks.from_rev(rev)
283 def mark_to_rev(mark):
284 global marks
285 return marks.to_rev(mark)
287 def export_ref(repo, name, kind, head):
288 global prefix, marks, mode
290 ename = '%s/%s' % (kind, name)
291 tip = marks.get_tip(ename)
293 # mercurial takes too much time checking this
294 if tip and tip == head.rev():
295 # nothing to do
296 return
297 revs = repo.revs('%u:%u' % (tip, head))
298 count = 0
300 revs = [rev for rev in revs if not marks.is_marked(rev)]
302 for rev in revs:
304 c = repo[rev]
305 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
306 rev_branch = extra['branch']
308 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
309 if 'committer' in extra:
310 user, time, tz = extra['committer'].rsplit(' ', 2)
311 committer = "%s %s %s" % (user, time, gittz(int(tz)))
312 else:
313 committer = author
315 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
317 if len(parents) == 0:
318 modified = c.manifest().keys()
319 removed = []
320 else:
321 modified, removed = get_filechanges(repo, c, parents[0])
323 if mode == 'hg':
324 extra_msg = ''
326 if rev_branch != 'default':
327 extra_msg += 'branch : %s\n' % rev_branch
329 renames = []
330 for f in c.files():
331 if f not in c.manifest():
332 continue
333 rename = c.filectx(f).renamed()
334 if rename:
335 renames.append((rename[0], f))
337 for e in renames:
338 extra_msg += "rename : %s => %s\n" % e
340 for key, value in extra.iteritems():
341 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
342 continue
343 else:
344 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
346 desc += '\n'
347 if extra_msg:
348 desc += '\n--HG--\n' + extra_msg
350 if len(parents) == 0 and rev:
351 print 'reset %s/%s' % (prefix, ename)
353 print "commit %s/%s" % (prefix, ename)
354 print "mark :%d" % (marks.get_mark(rev))
355 print "author %s" % (author)
356 print "committer %s" % (committer)
357 print "data %d" % (len(desc))
358 print desc
360 if len(parents) > 0:
361 print "from :%s" % (rev_to_mark(parents[0]))
362 if len(parents) > 1:
363 print "merge :%s" % (rev_to_mark(parents[1]))
365 for f in modified:
366 export_file(c.filectx(f))
367 for f in removed:
368 print "D %s" % (f)
369 print
371 count += 1
372 if (count % 100 == 0):
373 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
374 print "#############################################################"
376 # make sure the ref is updated
377 print "reset %s/%s" % (prefix, ename)
378 print "from :%u" % rev_to_mark(rev)
379 print
381 marks.set_tip(ename, rev)
383 def export_tag(repo, tag):
384 export_ref(repo, tag, 'tags', repo[tag])
386 def export_bookmark(repo, bmark):
387 head = bmarks[bmark]
388 export_ref(repo, bmark, 'bookmarks', head)
390 def export_branch(repo, branch):
391 tip = get_branch_tip(repo, branch)
392 head = repo[tip]
393 export_ref(repo, branch, 'branches', head)
395 def export_head(repo):
396 global g_head
397 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
399 def do_capabilities(parser):
400 global prefix, dirname
402 print "import"
403 print "export"
404 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
405 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
406 print "refspec refs/tags/*:%s/tags/*" % prefix
408 path = os.path.join(dirname, 'marks-git')
410 if os.path.exists(path):
411 print "*import-marks %s" % path
412 print "*export-marks %s" % path
414 print
416 def get_branch_tip(repo, branch):
417 global branches
419 heads = branches.get(branch, None)
420 if not heads:
421 return None
423 # verify there's only one head
424 if (len(heads) > 1):
425 warn("Branch '%s' has more than one head, consider merging" % branch)
426 # older versions of mercurial don't have this
427 if hasattr(repo, "branchtip"):
428 return repo.branchtip(branch)
430 return heads[0]
432 def list_head(repo, cur):
433 global g_head
435 head = bookmarks.readcurrent(repo)
436 if not head:
437 return
438 node = repo[head]
439 print "@refs/heads/%s HEAD" % head
440 g_head = (head, node)
442 def do_list(parser):
443 global branches, bmarks, mode
445 repo = parser.repo
446 for branch in repo.branchmap():
447 heads = repo.branchheads(branch)
448 if len(heads):
449 branches[branch] = heads
451 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
452 bmarks[bmark] = repo[node]
454 cur = repo.dirstate.branch()
456 list_head(repo, cur)
458 if mode != 'hg':
459 for branch in branches:
460 print "? refs/heads/branches/%s" % branch
462 for bmark in bmarks:
463 print "? refs/heads/%s" % bmark
465 for tag, node in repo.tagslist():
466 if tag == 'tip':
467 continue
468 print "? refs/tags/%s" % tag
470 print
472 def do_import(parser):
473 repo = parser.repo
475 path = os.path.join(dirname, 'marks-git')
477 print "feature done"
478 if os.path.exists(path):
479 print "feature import-marks=%s" % path
480 print "feature export-marks=%s" % path
481 sys.stdout.flush()
483 tmp = encoding.encoding
484 encoding.encoding = 'utf-8'
486 # lets get all the import lines
487 while parser.check('import'):
488 ref = parser[1]
490 if (ref == 'HEAD'):
491 export_head(repo)
492 elif ref.startswith('refs/heads/branches/'):
493 branch = ref[len('refs/heads/branches/'):]
494 export_branch(repo, branch)
495 elif ref.startswith('refs/heads/'):
496 bmark = ref[len('refs/heads/'):]
497 export_bookmark(repo, bmark)
498 elif ref.startswith('refs/tags/'):
499 tag = ref[len('refs/tags/'):]
500 export_tag(repo, tag)
502 parser.next()
504 encoding.encoding = tmp
506 print 'done'
508 def parse_blob(parser):
509 global blob_marks
511 parser.next()
512 mark = parser.get_mark()
513 parser.next()
514 data = parser.get_data()
515 blob_marks[mark] = data
516 parser.next()
517 return
519 def get_merge_files(repo, p1, p2, files):
520 for e in repo[p1].files():
521 if e not in files:
522 if e not in repo[p1].manifest():
523 continue
524 f = { 'ctx' : repo[p1][e] }
525 files[e] = f
527 def parse_commit(parser):
528 global marks, blob_marks, bmarks, parsed_refs
529 global mode
531 from_mark = merge_mark = None
533 ref = parser[1]
534 parser.next()
536 commit_mark = parser.get_mark()
537 parser.next()
538 author = parser.get_author()
539 parser.next()
540 committer = parser.get_author()
541 parser.next()
542 data = parser.get_data()
543 parser.next()
544 if parser.check('from'):
545 from_mark = parser.get_mark()
546 parser.next()
547 if parser.check('merge'):
548 merge_mark = parser.get_mark()
549 parser.next()
550 if parser.check('merge'):
551 die('octopus merges are not supported yet')
553 files = {}
555 for line in parser:
556 if parser.check('M'):
557 t, m, mark_ref, path = line.split(' ')
558 mark = int(mark_ref[1:])
559 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
560 elif parser.check('D'):
561 t, path = line.split(' ')
562 f = { 'deleted' : True }
563 else:
564 die('Unknown file command: %s' % line)
565 files[path] = f
567 def getfilectx(repo, memctx, f):
568 of = files[f]
569 if 'deleted' in of:
570 raise IOError
571 if 'ctx' in of:
572 return of['ctx']
573 is_exec = of['mode'] == 'x'
574 is_link = of['mode'] == 'l'
575 rename = of.get('rename', None)
576 return context.memfilectx(f, of['data'],
577 is_link, is_exec, rename)
579 repo = parser.repo
581 user, date, tz = author
582 extra = {}
584 if committer != author:
585 extra['committer'] = "%s %u %u" % committer
587 if from_mark:
588 p1 = repo.changelog.node(mark_to_rev(from_mark))
589 else:
590 p1 = '\0' * 20
592 if merge_mark:
593 p2 = repo.changelog.node(mark_to_rev(merge_mark))
594 else:
595 p2 = '\0' * 20
598 # If files changed from any of the parents, hg wants to know, but in git if
599 # nothing changed from the first parent, nothing changed.
601 if merge_mark:
602 get_merge_files(repo, p1, p2, files)
604 if mode == 'hg':
605 i = data.find('\n--HG--\n')
606 if i >= 0:
607 tmp = data[i + len('\n--HG--\n'):].strip()
608 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
609 if k == 'rename':
610 old, new = v.split(' => ', 1)
611 files[new]['rename'] = old
612 elif k == 'branch':
613 extra[k] = v
614 elif k == 'extra':
615 ek, ev = v.split(' : ', 1)
616 extra[ek] = urllib.unquote(ev)
617 data = data[:i]
619 ctx = context.memctx(repo, (p1, p2), data,
620 files.keys(), getfilectx,
621 user, (date, tz), extra)
623 tmp = encoding.encoding
624 encoding.encoding = 'utf-8'
626 node = repo.commitctx(ctx)
628 encoding.encoding = tmp
630 rev = repo[node].rev()
632 parsed_refs[ref] = node
634 marks.new_mark(rev, commit_mark)
636 def parse_reset(parser):
637 ref = parser[1]
638 parser.next()
639 # ugh
640 if parser.check('commit'):
641 parse_commit(parser)
642 return
643 if not parser.check('from'):
644 return
645 from_mark = parser.get_mark()
646 parser.next()
648 node = parser.repo.changelog.node(mark_to_rev(from_mark))
649 parsed_refs[ref] = node
651 def parse_tag(parser):
652 name = parser[1]
653 parser.next()
654 from_mark = parser.get_mark()
655 parser.next()
656 tagger = parser.get_author()
657 parser.next()
658 data = parser.get_data()
659 parser.next()
661 # nothing to do
663 def do_export(parser):
664 global parsed_refs, bmarks, peer
666 parser.next()
668 for line in parser.each_block('done'):
669 if parser.check('blob'):
670 parse_blob(parser)
671 elif parser.check('commit'):
672 parse_commit(parser)
673 elif parser.check('reset'):
674 parse_reset(parser)
675 elif parser.check('tag'):
676 parse_tag(parser)
677 elif parser.check('feature'):
678 pass
679 else:
680 die('unhandled export command: %s' % line)
682 for ref, node in parsed_refs.iteritems():
683 if ref.startswith('refs/heads/branches'):
684 pass
685 elif ref.startswith('refs/heads/'):
686 bmark = ref[len('refs/heads/'):]
687 if bmark in bmarks:
688 old = bmarks[bmark].hex()
689 else:
690 old = ''
691 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
692 continue
693 elif ref.startswith('refs/tags/'):
694 tag = ref[len('refs/tags/'):]
695 parser.repo.tag([tag], node, None, True, None, {})
696 print "ok %s" % ref
698 print
700 if peer:
701 parser.repo.push(peer, force=False)
703 def main(args):
704 global prefix, dirname, branches, bmarks
705 global marks, blob_marks, parsed_refs
706 global peer, mode, bad_mail, bad_name
708 alias = args[1]
709 url = args[2]
710 peer = None
712 cmd = ['git', 'config', '--get', 'remote-hg.hg-git-compat']
713 hg_git_compat = False
714 try:
715 if subprocess.check_output(cmd) == 'true\n':
716 hg_git_compat = True
717 except subprocess.CalledProcessError:
718 pass
720 if hg_git_compat:
721 mode = 'hg'
722 bad_mail = 'none@none'
723 bad_name = ''
724 else:
725 mode = 'git'
726 bad_mail = 'unknown'
727 bad_name = 'Unknown'
729 if alias[4:] == url:
730 is_tmp = True
731 alias = util.sha1(alias).hexdigest()
732 else:
733 is_tmp = False
735 gitdir = os.environ['GIT_DIR']
736 dirname = os.path.join(gitdir, 'hg', alias)
737 branches = {}
738 bmarks = {}
739 blob_marks = {}
740 parsed_refs = {}
742 repo = get_repo(url, alias)
743 prefix = 'refs/hg/%s' % alias
745 if not os.path.exists(dirname):
746 os.makedirs(dirname)
748 marks_path = os.path.join(dirname, 'marks-hg')
749 marks = Marks(marks_path)
751 parser = Parser(repo)
752 for line in parser:
753 if parser.check('capabilities'):
754 do_capabilities(parser)
755 elif parser.check('list'):
756 do_list(parser)
757 elif parser.check('import'):
758 do_import(parser)
759 elif parser.check('export'):
760 do_export(parser)
761 else:
762 die('unhandled command: %s' % line)
763 sys.stdout.flush()
765 if not is_tmp:
766 marks.store()
767 else:
768 shutil.rmtree(dirname)
770 sys.exit(main(sys.argv))