remote-hg: add support for hg-git compat mode
[git/mingw.git] / contrib / remote-helpers / git-remote-hg
blobd5857560ed9ea8e6731605f4ae9cd29d7944d5cf
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
22 # If you want to switch to hg-git compatibility mode:
23 # git config --global remote-hg.hg-git-compat true
25 # git:
26 # Sensible defaults for git.
27 # hg bookmarks are exported as git branches, hg branches are prefixed
28 # with 'branches/'.
30 # hg:
31 # Emulate hg-git.
32 # Only hg bookmarks are exported as git branches.
33 # Commits are modified to preserve hg information and allow biridectionality.
36 NAME_RE = re.compile('^([^<>]+)')
37 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]+)>$')
38 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.+)> (\d+) ([+-]\d+)')
40 def die(msg, *args):
41 sys.stderr.write('ERROR: %s\n' % (msg % args))
42 sys.exit(1)
44 def warn(msg, *args):
45 sys.stderr.write('WARNING: %s\n' % (msg % args))
47 def gitmode(flags):
48 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
50 def gittz(tz):
51 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
53 def hgmode(mode):
54 m = { '0100755': 'x', '0120000': 'l' }
55 return m.get(mode, '')
57 class Marks:
59 def __init__(self, path):
60 self.path = path
61 self.tips = {}
62 self.marks = {}
63 self.rev_marks = {}
64 self.last_mark = 0
66 self.load()
68 def load(self):
69 if not os.path.exists(self.path):
70 return
72 tmp = json.load(open(self.path))
74 self.tips = tmp['tips']
75 self.marks = tmp['marks']
76 self.last_mark = tmp['last-mark']
78 for rev, mark in self.marks.iteritems():
79 self.rev_marks[mark] = int(rev)
81 def dict(self):
82 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
84 def store(self):
85 json.dump(self.dict(), open(self.path, 'w'))
87 def __str__(self):
88 return str(self.dict())
90 def from_rev(self, rev):
91 return self.marks[str(rev)]
93 def to_rev(self, mark):
94 return self.rev_marks[mark]
96 def get_mark(self, rev):
97 self.last_mark += 1
98 self.marks[str(rev)] = self.last_mark
99 return self.last_mark
101 def new_mark(self, rev, mark):
102 self.marks[str(rev)] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
106 def is_marked(self, rev):
107 return self.marks.has_key(str(rev))
109 def get_tip(self, branch):
110 return self.tips.get(branch, 0)
112 def set_tip(self, branch, tip):
113 self.tips[branch] = tip
115 class Parser:
117 def __init__(self, repo):
118 self.repo = repo
119 self.line = self.get_line()
121 def get_line(self):
122 return sys.stdin.readline().strip()
124 def __getitem__(self, i):
125 return self.line.split()[i]
127 def check(self, word):
128 return self.line.startswith(word)
130 def each_block(self, separator):
131 while self.line != separator:
132 yield self.line
133 self.line = self.get_line()
135 def __iter__(self):
136 return self.each_block('')
138 def next(self):
139 self.line = self.get_line()
140 if self.line == 'done':
141 self.line = None
143 def get_mark(self):
144 i = self.line.index(':') + 1
145 return int(self.line[i:])
147 def get_data(self):
148 if not self.check('data'):
149 return None
150 i = self.line.index(' ') + 1
151 size = int(self.line[i:])
152 return sys.stdin.read(size)
154 def get_author(self):
155 m = RAW_AUTHOR_RE.match(self.line)
156 if not m:
157 return None
158 _, name, email, date, tz = m.groups()
160 if email != 'unknown':
161 if name:
162 user = '%s <%s>' % (name, email)
163 else:
164 user = '<%s>' % (email)
165 else:
166 user = name
168 tz = int(tz)
169 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
170 return (user, int(date), -tz)
172 def export_file(fc):
173 d = fc.data()
174 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
175 print "data %d" % len(d)
176 print d
178 def get_filechanges(repo, ctx, parent):
179 modified = set()
180 added = set()
181 removed = set()
183 cur = ctx.manifest()
184 prev = repo[parent].manifest().copy()
186 for fn in cur:
187 if fn in prev:
188 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
189 modified.add(fn)
190 del prev[fn]
191 else:
192 added.add(fn)
193 removed |= set(prev.keys())
195 return added | modified, removed
197 def fixup_user(user):
198 user = user.replace('"', '')
199 name = mail = None
200 m = AUTHOR_RE.match(user)
201 if m:
202 name = m.group(1)
203 mail = m.group(2).strip()
204 else:
205 m = NAME_RE.match(user)
206 if m:
207 name = m.group(1).strip()
209 if not name:
210 name = 'Unknown'
211 if not mail:
212 mail = 'unknown'
214 return '%s <%s>' % (name, mail)
216 def get_repo(url, alias):
217 global dirname, peer
219 myui = ui.ui()
220 myui.setconfig('ui', 'interactive', 'off')
222 if hg.islocal(url):
223 repo = hg.repository(myui, url)
224 else:
225 local_path = os.path.join(dirname, 'clone')
226 if not os.path.exists(local_path):
227 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
228 repo = dstpeer.local()
229 else:
230 repo = hg.repository(myui, local_path)
231 peer = hg.peer(myui, {}, url)
232 repo.pull(peer, heads=None, force=True)
234 return repo
236 def rev_to_mark(rev):
237 global marks
238 return marks.from_rev(rev)
240 def mark_to_rev(mark):
241 global marks
242 return marks.to_rev(mark)
244 def export_ref(repo, name, kind, head):
245 global prefix, marks, mode
247 ename = '%s/%s' % (kind, name)
248 tip = marks.get_tip(ename)
250 # mercurial takes too much time checking this
251 if tip and tip == head.rev():
252 # nothing to do
253 return
254 revs = repo.revs('%u:%u' % (tip, head))
255 count = 0
257 revs = [rev for rev in revs if not marks.is_marked(rev)]
259 for rev in revs:
261 c = repo[rev]
262 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
263 rev_branch = extra['branch']
265 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
266 if 'committer' in extra:
267 user, time, tz = extra['committer'].rsplit(' ', 2)
268 committer = "%s %s %s" % (user, time, gittz(int(tz)))
269 else:
270 committer = author
272 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
274 if len(parents) == 0:
275 modified = c.manifest().keys()
276 removed = []
277 else:
278 modified, removed = get_filechanges(repo, c, parents[0])
280 if mode == 'hg':
281 extra_msg = ''
283 if rev_branch != 'default':
284 extra_msg += 'branch : %s\n' % rev_branch
286 renames = []
287 for f in c.files():
288 if f not in c.manifest():
289 continue
290 rename = c.filectx(f).renamed()
291 if rename:
292 renames.append((rename[0], f))
294 for e in renames:
295 extra_msg += "rename : %s => %s\n" % e
297 for key, value in extra.iteritems():
298 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
299 continue
300 else:
301 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
303 desc += '\n'
304 if extra_msg:
305 desc += '\n--HG--\n' + extra_msg
307 if len(parents) == 0 and rev:
308 print 'reset %s/%s' % (prefix, ename)
310 print "commit %s/%s" % (prefix, ename)
311 print "mark :%d" % (marks.get_mark(rev))
312 print "author %s" % (author)
313 print "committer %s" % (committer)
314 print "data %d" % (len(desc))
315 print desc
317 if len(parents) > 0:
318 print "from :%s" % (rev_to_mark(parents[0]))
319 if len(parents) > 1:
320 print "merge :%s" % (rev_to_mark(parents[1]))
322 for f in modified:
323 export_file(c.filectx(f))
324 for f in removed:
325 print "D %s" % (f)
326 print
328 count += 1
329 if (count % 100 == 0):
330 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
331 print "#############################################################"
333 # make sure the ref is updated
334 print "reset %s/%s" % (prefix, ename)
335 print "from :%u" % rev_to_mark(rev)
336 print
338 marks.set_tip(ename, rev)
340 def export_tag(repo, tag):
341 export_ref(repo, tag, 'tags', repo[tag])
343 def export_bookmark(repo, bmark):
344 head = bmarks[bmark]
345 export_ref(repo, bmark, 'bookmarks', head)
347 def export_branch(repo, branch):
348 tip = get_branch_tip(repo, branch)
349 head = repo[tip]
350 export_ref(repo, branch, 'branches', head)
352 def export_head(repo):
353 global g_head
354 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
356 def do_capabilities(parser):
357 global prefix, dirname
359 print "import"
360 print "export"
361 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
362 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
363 print "refspec refs/tags/*:%s/tags/*" % prefix
365 path = os.path.join(dirname, 'marks-git')
367 if os.path.exists(path):
368 print "*import-marks %s" % path
369 print "*export-marks %s" % path
371 print
373 def get_branch_tip(repo, branch):
374 global branches
376 heads = branches.get(branch, None)
377 if not heads:
378 return None
380 # verify there's only one head
381 if (len(heads) > 1):
382 warn("Branch '%s' has more than one head, consider merging" % branch)
383 # older versions of mercurial don't have this
384 if hasattr(repo, "branchtip"):
385 return repo.branchtip(branch)
387 return heads[0]
389 def list_head(repo, cur):
390 global g_head
392 head = bookmarks.readcurrent(repo)
393 if not head:
394 return
395 node = repo[head]
396 print "@refs/heads/%s HEAD" % head
397 g_head = (head, node)
399 def do_list(parser):
400 global branches, bmarks, mode
402 repo = parser.repo
403 for branch in repo.branchmap():
404 heads = repo.branchheads(branch)
405 if len(heads):
406 branches[branch] = heads
408 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
409 bmarks[bmark] = repo[node]
411 cur = repo.dirstate.branch()
413 list_head(repo, cur)
415 if mode != 'hg':
416 for branch in branches:
417 print "? refs/heads/branches/%s" % branch
419 for bmark in bmarks:
420 print "? refs/heads/%s" % bmark
422 for tag, node in repo.tagslist():
423 if tag == 'tip':
424 continue
425 print "? refs/tags/%s" % tag
427 print
429 def do_import(parser):
430 repo = parser.repo
432 path = os.path.join(dirname, 'marks-git')
434 print "feature done"
435 if os.path.exists(path):
436 print "feature import-marks=%s" % path
437 print "feature export-marks=%s" % path
438 sys.stdout.flush()
440 tmp = encoding.encoding
441 encoding.encoding = 'utf-8'
443 # lets get all the import lines
444 while parser.check('import'):
445 ref = parser[1]
447 if (ref == 'HEAD'):
448 export_head(repo)
449 elif ref.startswith('refs/heads/branches/'):
450 branch = ref[len('refs/heads/branches/'):]
451 export_branch(repo, branch)
452 elif ref.startswith('refs/heads/'):
453 bmark = ref[len('refs/heads/'):]
454 export_bookmark(repo, bmark)
455 elif ref.startswith('refs/tags/'):
456 tag = ref[len('refs/tags/'):]
457 export_tag(repo, tag)
459 parser.next()
461 encoding.encoding = tmp
463 print 'done'
465 def parse_blob(parser):
466 global blob_marks
468 parser.next()
469 mark = parser.get_mark()
470 parser.next()
471 data = parser.get_data()
472 blob_marks[mark] = data
473 parser.next()
474 return
476 def get_merge_files(repo, p1, p2, files):
477 for e in repo[p1].files():
478 if e not in files:
479 if e not in repo[p1].manifest():
480 continue
481 f = { 'ctx' : repo[p1][e] }
482 files[e] = f
484 def parse_commit(parser):
485 global marks, blob_marks, bmarks, parsed_refs
486 global mode
488 from_mark = merge_mark = None
490 ref = parser[1]
491 parser.next()
493 commit_mark = parser.get_mark()
494 parser.next()
495 author = parser.get_author()
496 parser.next()
497 committer = parser.get_author()
498 parser.next()
499 data = parser.get_data()
500 parser.next()
501 if parser.check('from'):
502 from_mark = parser.get_mark()
503 parser.next()
504 if parser.check('merge'):
505 merge_mark = parser.get_mark()
506 parser.next()
507 if parser.check('merge'):
508 die('octopus merges are not supported yet')
510 files = {}
512 for line in parser:
513 if parser.check('M'):
514 t, m, mark_ref, path = line.split(' ')
515 mark = int(mark_ref[1:])
516 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
517 elif parser.check('D'):
518 t, path = line.split(' ')
519 f = { 'deleted' : True }
520 else:
521 die('Unknown file command: %s' % line)
522 files[path] = f
524 def getfilectx(repo, memctx, f):
525 of = files[f]
526 if 'deleted' in of:
527 raise IOError
528 if 'ctx' in of:
529 return of['ctx']
530 is_exec = of['mode'] == 'x'
531 is_link = of['mode'] == 'l'
532 rename = of.get('rename', None)
533 return context.memfilectx(f, of['data'],
534 is_link, is_exec, rename)
536 repo = parser.repo
538 user, date, tz = author
539 extra = {}
541 if committer != author:
542 extra['committer'] = "%s %u %u" % committer
544 if from_mark:
545 p1 = repo.changelog.node(mark_to_rev(from_mark))
546 else:
547 p1 = '\0' * 20
549 if merge_mark:
550 p2 = repo.changelog.node(mark_to_rev(merge_mark))
551 else:
552 p2 = '\0' * 20
555 # If files changed from any of the parents, hg wants to know, but in git if
556 # nothing changed from the first parent, nothing changed.
558 if merge_mark:
559 get_merge_files(repo, p1, p2, files)
561 if mode == 'hg':
562 i = data.find('\n--HG--\n')
563 if i >= 0:
564 tmp = data[i + len('\n--HG--\n'):].strip()
565 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
566 if k == 'rename':
567 old, new = v.split(' => ', 1)
568 files[new]['rename'] = old
569 elif k == 'branch':
570 extra[k] = v
571 elif k == 'extra':
572 ek, ev = v.split(' : ', 1)
573 extra[ek] = urllib.unquote(ev)
574 data = data[:i]
576 ctx = context.memctx(repo, (p1, p2), data,
577 files.keys(), getfilectx,
578 user, (date, tz), extra)
580 tmp = encoding.encoding
581 encoding.encoding = 'utf-8'
583 node = repo.commitctx(ctx)
585 encoding.encoding = tmp
587 rev = repo[node].rev()
589 parsed_refs[ref] = node
591 marks.new_mark(rev, commit_mark)
593 def parse_reset(parser):
594 ref = parser[1]
595 parser.next()
596 # ugh
597 if parser.check('commit'):
598 parse_commit(parser)
599 return
600 if not parser.check('from'):
601 return
602 from_mark = parser.get_mark()
603 parser.next()
605 node = parser.repo.changelog.node(mark_to_rev(from_mark))
606 parsed_refs[ref] = node
608 def parse_tag(parser):
609 name = parser[1]
610 parser.next()
611 from_mark = parser.get_mark()
612 parser.next()
613 tagger = parser.get_author()
614 parser.next()
615 data = parser.get_data()
616 parser.next()
618 # nothing to do
620 def do_export(parser):
621 global parsed_refs, bmarks, peer
623 parser.next()
625 for line in parser.each_block('done'):
626 if parser.check('blob'):
627 parse_blob(parser)
628 elif parser.check('commit'):
629 parse_commit(parser)
630 elif parser.check('reset'):
631 parse_reset(parser)
632 elif parser.check('tag'):
633 parse_tag(parser)
634 elif parser.check('feature'):
635 pass
636 else:
637 die('unhandled export command: %s' % line)
639 for ref, node in parsed_refs.iteritems():
640 if ref.startswith('refs/heads/branches'):
641 pass
642 elif ref.startswith('refs/heads/'):
643 bmark = ref[len('refs/heads/'):]
644 if bmark in bmarks:
645 old = bmarks[bmark].hex()
646 else:
647 old = ''
648 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
649 continue
650 elif ref.startswith('refs/tags/'):
651 tag = ref[len('refs/tags/'):]
652 parser.repo.tag([tag], node, None, True, None, {})
653 print "ok %s" % ref
655 print
657 if peer:
658 parser.repo.push(peer, force=False)
660 def main(args):
661 global prefix, dirname, branches, bmarks
662 global marks, blob_marks, parsed_refs
663 global peer, mode
665 alias = args[1]
666 url = args[2]
667 peer = None
669 cmd = ['git', 'config', '--get', 'remote-hg.hg-git-compat']
670 hg_git_compat = False
671 try:
672 if subprocess.check_output(cmd) == 'true\n':
673 hg_git_compat = True
674 except subprocess.CalledProcessError:
675 pass
677 if hg_git_compat:
678 mode = 'hg'
679 else:
680 mode = 'git'
682 if alias[4:] == url:
683 is_tmp = True
684 alias = util.sha1(alias).hexdigest()
685 else:
686 is_tmp = False
688 gitdir = os.environ['GIT_DIR']
689 dirname = os.path.join(gitdir, 'hg', alias)
690 branches = {}
691 bmarks = {}
692 blob_marks = {}
693 parsed_refs = {}
695 repo = get_repo(url, alias)
696 prefix = 'refs/hg/%s' % alias
698 if not os.path.exists(dirname):
699 os.makedirs(dirname)
701 marks_path = os.path.join(dirname, 'marks-hg')
702 marks = Marks(marks_path)
704 parser = Parser(repo)
705 for line in parser:
706 if parser.check('capabilities'):
707 do_capabilities(parser)
708 elif parser.check('list'):
709 do_list(parser)
710 elif parser.check('import'):
711 do_import(parser)
712 elif parser.check('export'):
713 do_export(parser)
714 else:
715 die('unhandled command: %s' % line)
716 sys.stdout.flush()
718 if not is_tmp:
719 marks.store()
720 else:
721 shutil.rmtree(dirname)
723 sys.exit(main(sys.argv))