remote-hg: fix for files with spaces
[alt-git.git] / contrib / remote-helpers / git-remote-hg
blobc6a1a47cc8ec29badfc630a1140ca1cdaafed6f9
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # If you are not in hg-git-compat mode and want to disable the tracking of
27 # named branches:
28 # git config --global remote-hg.track-branches false
30 # git:
31 # Sensible defaults for git.
32 # hg bookmarks are exported as git branches, hg branches are prefixed
33 # with 'branches/', HEAD is a special case.
35 # hg:
36 # Emulate hg-git.
37 # Only hg bookmarks are exported as git branches.
38 # Commits are modified to preserve hg information and allow bidirectionality.
41 NAME_RE = re.compile('^([^<>]+)')
42 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
43 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
44 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
46 def die(msg, *args):
47 sys.stderr.write('ERROR: %s\n' % (msg % args))
48 sys.exit(1)
50 def warn(msg, *args):
51 sys.stderr.write('WARNING: %s\n' % (msg % args))
53 def gitmode(flags):
54 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
56 def gittz(tz):
57 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
59 def hgmode(mode):
60 m = { '100755': 'x', '120000': 'l' }
61 return m.get(mode, '')
63 def get_config(config):
64 cmd = ['git', 'config', '--get', config]
65 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
66 output, _ = process.communicate()
67 return output
69 class Marks:
71 def __init__(self, path):
72 self.path = path
73 self.tips = {}
74 self.marks = {}
75 self.rev_marks = {}
76 self.last_mark = 0
78 self.load()
80 def load(self):
81 if not os.path.exists(self.path):
82 return
84 tmp = json.load(open(self.path))
86 self.tips = tmp['tips']
87 self.marks = tmp['marks']
88 self.last_mark = tmp['last-mark']
90 for rev, mark in self.marks.iteritems():
91 self.rev_marks[mark] = int(rev)
93 def dict(self):
94 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
96 def store(self):
97 json.dump(self.dict(), open(self.path, 'w'))
99 def __str__(self):
100 return str(self.dict())
102 def from_rev(self, rev):
103 return self.marks[str(rev)]
105 def to_rev(self, mark):
106 return self.rev_marks[mark]
108 def get_mark(self, rev):
109 self.last_mark += 1
110 self.marks[str(rev)] = self.last_mark
111 return self.last_mark
113 def new_mark(self, rev, mark):
114 self.marks[str(rev)] = mark
115 self.rev_marks[mark] = rev
116 self.last_mark = mark
118 def is_marked(self, rev):
119 return self.marks.has_key(str(rev))
121 def get_tip(self, branch):
122 return self.tips.get(branch, 0)
124 def set_tip(self, branch, tip):
125 self.tips[branch] = tip
127 class Parser:
129 def __init__(self, repo):
130 self.repo = repo
131 self.line = self.get_line()
133 def get_line(self):
134 return sys.stdin.readline().strip()
136 def __getitem__(self, i):
137 return self.line.split()[i]
139 def check(self, word):
140 return self.line.startswith(word)
142 def each_block(self, separator):
143 while self.line != separator:
144 yield self.line
145 self.line = self.get_line()
147 def __iter__(self):
148 return self.each_block('')
150 def next(self):
151 self.line = self.get_line()
152 if self.line == 'done':
153 self.line = None
155 def get_mark(self):
156 i = self.line.index(':') + 1
157 return int(self.line[i:])
159 def get_data(self):
160 if not self.check('data'):
161 return None
162 i = self.line.index(' ') + 1
163 size = int(self.line[i:])
164 return sys.stdin.read(size)
166 def get_author(self):
167 global bad_mail
169 ex = None
170 m = RAW_AUTHOR_RE.match(self.line)
171 if not m:
172 return None
173 _, name, email, date, tz = m.groups()
174 if name and 'ext:' in name:
175 m = re.match('^(.+?) ext:\((.+)\)$', name)
176 if m:
177 name = m.group(1)
178 ex = urllib.unquote(m.group(2))
180 if email != bad_mail:
181 if name:
182 user = '%s <%s>' % (name, email)
183 else:
184 user = '<%s>' % (email)
185 else:
186 user = name
188 if ex:
189 user += ex
191 tz = int(tz)
192 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
193 return (user, int(date), -tz)
195 def export_file(fc):
196 d = fc.data()
197 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
198 print "data %d" % len(d)
199 print d
201 def get_filechanges(repo, ctx, parent):
202 modified = set()
203 added = set()
204 removed = set()
206 cur = ctx.manifest()
207 prev = repo[parent].manifest().copy()
209 for fn in cur:
210 if fn in prev:
211 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
212 modified.add(fn)
213 del prev[fn]
214 else:
215 added.add(fn)
216 removed |= set(prev.keys())
218 return added | modified, removed
220 def fixup_user_git(user):
221 name = mail = None
222 user = user.replace('"', '')
223 m = AUTHOR_RE.match(user)
224 if m:
225 name = m.group(1)
226 mail = m.group(2).strip()
227 else:
228 m = NAME_RE.match(user)
229 if m:
230 name = m.group(1).strip()
231 return (name, mail)
233 def fixup_user_hg(user):
234 def sanitize(name):
235 # stole this from hg-git
236 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
238 m = AUTHOR_HG_RE.match(user)
239 if m:
240 name = sanitize(m.group(1))
241 mail = sanitize(m.group(2))
242 ex = m.group(3)
243 if ex:
244 name += ' ext:(' + urllib.quote(ex) + ')'
245 else:
246 name = sanitize(user)
247 if '@' in user:
248 mail = name
249 else:
250 mail = None
252 return (name, mail)
254 def fixup_user(user):
255 global mode, bad_mail
257 if mode == 'git':
258 name, mail = fixup_user_git(user)
259 else:
260 name, mail = fixup_user_hg(user)
262 if not name:
263 name = bad_name
264 if not mail:
265 mail = bad_mail
267 return '%s <%s>' % (name, mail)
269 def get_repo(url, alias):
270 global dirname, peer
272 myui = ui.ui()
273 myui.setconfig('ui', 'interactive', 'off')
275 if hg.islocal(url):
276 repo = hg.repository(myui, url)
277 else:
278 local_path = os.path.join(dirname, 'clone')
279 if not os.path.exists(local_path):
280 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
281 repo = dstpeer.local()
282 else:
283 repo = hg.repository(myui, local_path)
284 peer = hg.peer(myui, {}, url)
285 repo.pull(peer, heads=None, force=True)
287 return repo
289 def rev_to_mark(rev):
290 global marks
291 return marks.from_rev(rev)
293 def mark_to_rev(mark):
294 global marks
295 return marks.to_rev(mark)
297 def export_ref(repo, name, kind, head):
298 global prefix, marks, mode
300 ename = '%s/%s' % (kind, name)
301 tip = marks.get_tip(ename)
303 # mercurial takes too much time checking this
304 if tip and tip == head.rev():
305 # nothing to do
306 return
307 revs = xrange(tip, head.rev() + 1)
308 count = 0
310 revs = [rev for rev in revs if not marks.is_marked(rev)]
312 for rev in revs:
314 c = repo[rev]
315 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
316 rev_branch = extra['branch']
318 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
319 if 'committer' in extra:
320 user, time, tz = extra['committer'].rsplit(' ', 2)
321 committer = "%s %s %s" % (user, time, gittz(int(tz)))
322 else:
323 committer = author
325 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
327 if len(parents) == 0:
328 modified = c.manifest().keys()
329 removed = []
330 else:
331 modified, removed = get_filechanges(repo, c, parents[0])
333 if mode == 'hg':
334 extra_msg = ''
336 if rev_branch != 'default':
337 extra_msg += 'branch : %s\n' % rev_branch
339 renames = []
340 for f in c.files():
341 if f not in c.manifest():
342 continue
343 rename = c.filectx(f).renamed()
344 if rename:
345 renames.append((rename[0], f))
347 for e in renames:
348 extra_msg += "rename : %s => %s\n" % e
350 for key, value in extra.iteritems():
351 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
352 continue
353 else:
354 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
356 desc += '\n'
357 if extra_msg:
358 desc += '\n--HG--\n' + extra_msg
360 if len(parents) == 0 and rev:
361 print 'reset %s/%s' % (prefix, ename)
363 print "commit %s/%s" % (prefix, ename)
364 print "mark :%d" % (marks.get_mark(rev))
365 print "author %s" % (author)
366 print "committer %s" % (committer)
367 print "data %d" % (len(desc))
368 print desc
370 if len(parents) > 0:
371 print "from :%s" % (rev_to_mark(parents[0]))
372 if len(parents) > 1:
373 print "merge :%s" % (rev_to_mark(parents[1]))
375 for f in modified:
376 export_file(c.filectx(f))
377 for f in removed:
378 print "D %s" % (f)
379 print
381 count += 1
382 if (count % 100 == 0):
383 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
384 print "#############################################################"
386 # make sure the ref is updated
387 print "reset %s/%s" % (prefix, ename)
388 print "from :%u" % rev_to_mark(rev)
389 print
391 marks.set_tip(ename, rev)
393 def export_tag(repo, tag):
394 export_ref(repo, tag, 'tags', repo[tag])
396 def export_bookmark(repo, bmark):
397 head = bmarks[bmark]
398 export_ref(repo, bmark, 'bookmarks', head)
400 def export_branch(repo, branch):
401 tip = get_branch_tip(repo, branch)
402 head = repo[tip]
403 export_ref(repo, branch, 'branches', head)
405 def export_head(repo):
406 global g_head
407 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
409 def do_capabilities(parser):
410 global prefix, dirname
412 print "import"
413 print "export"
414 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
415 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
416 print "refspec refs/tags/*:%s/tags/*" % prefix
418 path = os.path.join(dirname, 'marks-git')
420 if os.path.exists(path):
421 print "*import-marks %s" % path
422 print "*export-marks %s" % path
424 print
426 def get_branch_tip(repo, branch):
427 global branches
429 heads = branches.get(branch, None)
430 if not heads:
431 return None
433 # verify there's only one head
434 if (len(heads) > 1):
435 warn("Branch '%s' has more than one head, consider merging" % branch)
436 # older versions of mercurial don't have this
437 if hasattr(repo, "branchtip"):
438 return repo.branchtip(branch)
440 return heads[0]
442 def list_head(repo, cur):
443 global g_head, bmarks
445 head = bookmarks.readcurrent(repo)
446 if head:
447 node = repo[head]
448 else:
449 # fake bookmark from current branch
450 head = cur
451 node = repo['.']
452 if not node:
453 node = repo['tip']
454 if not node:
455 return
456 if head == 'default':
457 head = 'master'
458 bmarks[head] = node
460 print "@refs/heads/%s HEAD" % head
461 g_head = (head, node)
463 def do_list(parser):
464 global branches, bmarks, mode, track_branches
466 repo = parser.repo
467 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
468 bmarks[bmark] = repo[node]
470 cur = repo.dirstate.branch()
472 list_head(repo, cur)
474 if track_branches:
475 for branch in repo.branchmap():
476 heads = repo.branchheads(branch)
477 if len(heads):
478 branches[branch] = heads
480 for branch in branches:
481 print "? refs/heads/branches/%s" % branch
483 for bmark in bmarks:
484 print "? refs/heads/%s" % bmark
486 for tag, node in repo.tagslist():
487 if tag == 'tip':
488 continue
489 print "? refs/tags/%s" % tag
491 print
493 def do_import(parser):
494 repo = parser.repo
496 path = os.path.join(dirname, 'marks-git')
498 print "feature done"
499 if os.path.exists(path):
500 print "feature import-marks=%s" % path
501 print "feature export-marks=%s" % path
502 sys.stdout.flush()
504 tmp = encoding.encoding
505 encoding.encoding = 'utf-8'
507 # lets get all the import lines
508 while parser.check('import'):
509 ref = parser[1]
511 if (ref == 'HEAD'):
512 export_head(repo)
513 elif ref.startswith('refs/heads/branches/'):
514 branch = ref[len('refs/heads/branches/'):]
515 export_branch(repo, branch)
516 elif ref.startswith('refs/heads/'):
517 bmark = ref[len('refs/heads/'):]
518 export_bookmark(repo, bmark)
519 elif ref.startswith('refs/tags/'):
520 tag = ref[len('refs/tags/'):]
521 export_tag(repo, tag)
523 parser.next()
525 encoding.encoding = tmp
527 print 'done'
529 def parse_blob(parser):
530 global blob_marks
532 parser.next()
533 mark = parser.get_mark()
534 parser.next()
535 data = parser.get_data()
536 blob_marks[mark] = data
537 parser.next()
539 def get_merge_files(repo, p1, p2, files):
540 for e in repo[p1].files():
541 if e not in files:
542 if e not in repo[p1].manifest():
543 continue
544 f = { 'ctx' : repo[p1][e] }
545 files[e] = f
547 def parse_commit(parser):
548 global marks, blob_marks, parsed_refs
549 global mode
551 from_mark = merge_mark = None
553 ref = parser[1]
554 parser.next()
556 commit_mark = parser.get_mark()
557 parser.next()
558 author = parser.get_author()
559 parser.next()
560 committer = parser.get_author()
561 parser.next()
562 data = parser.get_data()
563 parser.next()
564 if parser.check('from'):
565 from_mark = parser.get_mark()
566 parser.next()
567 if parser.check('merge'):
568 merge_mark = parser.get_mark()
569 parser.next()
570 if parser.check('merge'):
571 die('octopus merges are not supported yet')
573 files = {}
575 for line in parser:
576 if parser.check('M'):
577 t, m, mark_ref, path = line.split(' ', 3)
578 mark = int(mark_ref[1:])
579 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
580 elif parser.check('D'):
581 t, path = line.split(' ', 1)
582 f = { 'deleted' : True }
583 else:
584 die('Unknown file command: %s' % line)
585 files[path] = f
587 def getfilectx(repo, memctx, f):
588 of = files[f]
589 if 'deleted' in of:
590 raise IOError
591 if 'ctx' in of:
592 return of['ctx']
593 is_exec = of['mode'] == 'x'
594 is_link = of['mode'] == 'l'
595 rename = of.get('rename', None)
596 return context.memfilectx(f, of['data'],
597 is_link, is_exec, rename)
599 repo = parser.repo
601 user, date, tz = author
602 extra = {}
604 if committer != author:
605 extra['committer'] = "%s %u %u" % committer
607 if from_mark:
608 p1 = repo.changelog.node(mark_to_rev(from_mark))
609 else:
610 p1 = '\0' * 20
612 if merge_mark:
613 p2 = repo.changelog.node(mark_to_rev(merge_mark))
614 else:
615 p2 = '\0' * 20
618 # If files changed from any of the parents, hg wants to know, but in git if
619 # nothing changed from the first parent, nothing changed.
621 if merge_mark:
622 get_merge_files(repo, p1, p2, files)
624 if mode == 'hg':
625 i = data.find('\n--HG--\n')
626 if i >= 0:
627 tmp = data[i + len('\n--HG--\n'):].strip()
628 for k, v in [e.split(' : ', 1) for e in tmp.split('\n')]:
629 if k == 'rename':
630 old, new = v.split(' => ', 1)
631 files[new]['rename'] = old
632 elif k == 'branch':
633 extra[k] = v
634 elif k == 'extra':
635 ek, ev = v.split(' : ', 1)
636 extra[ek] = urllib.unquote(ev)
637 data = data[:i]
639 ctx = context.memctx(repo, (p1, p2), data,
640 files.keys(), getfilectx,
641 user, (date, tz), extra)
643 tmp = encoding.encoding
644 encoding.encoding = 'utf-8'
646 node = repo.commitctx(ctx)
648 encoding.encoding = tmp
650 rev = repo[node].rev()
652 parsed_refs[ref] = node
653 marks.new_mark(rev, commit_mark)
655 def parse_reset(parser):
656 global parsed_refs
658 ref = parser[1]
659 parser.next()
660 # ugh
661 if parser.check('commit'):
662 parse_commit(parser)
663 return
664 if not parser.check('from'):
665 return
666 from_mark = parser.get_mark()
667 parser.next()
669 node = parser.repo.changelog.node(mark_to_rev(from_mark))
670 parsed_refs[ref] = node
672 def parse_tag(parser):
673 name = parser[1]
674 parser.next()
675 from_mark = parser.get_mark()
676 parser.next()
677 tagger = parser.get_author()
678 parser.next()
679 data = parser.get_data()
680 parser.next()
682 # nothing to do
684 def do_export(parser):
685 global parsed_refs, bmarks, peer
687 parser.next()
689 for line in parser.each_block('done'):
690 if parser.check('blob'):
691 parse_blob(parser)
692 elif parser.check('commit'):
693 parse_commit(parser)
694 elif parser.check('reset'):
695 parse_reset(parser)
696 elif parser.check('tag'):
697 parse_tag(parser)
698 elif parser.check('feature'):
699 pass
700 else:
701 die('unhandled export command: %s' % line)
703 for ref, node in parsed_refs.iteritems():
704 if ref.startswith('refs/heads/branches'):
705 pass
706 elif ref.startswith('refs/heads/'):
707 bmark = ref[len('refs/heads/'):]
708 if bmark in bmarks:
709 old = bmarks[bmark].hex()
710 else:
711 old = ''
712 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
713 print "error %s" % ref
714 continue
715 elif ref.startswith('refs/tags/'):
716 tag = ref[len('refs/tags/'):]
717 parser.repo.tag([tag], node, None, True, None, {})
718 else:
719 # transport-helper/fast-export bugs
720 continue
721 print "ok %s" % ref
723 if peer:
724 parser.repo.push(peer, force=False)
726 print
728 def fix_path(alias, repo, orig_url):
729 repo_url = util.url(repo.url())
730 url = util.url(orig_url)
731 if str(url) == str(repo_url):
732 return
733 cmd = ['git', 'config', 'remote.%s.url' % alias, "hg::%s" % repo_url]
734 subprocess.call(cmd)
736 def main(args):
737 global prefix, dirname, branches, bmarks
738 global marks, blob_marks, parsed_refs
739 global peer, mode, bad_mail, bad_name
740 global track_branches
742 alias = args[1]
743 url = args[2]
744 peer = None
746 hg_git_compat = False
747 track_branches = True
748 try:
749 if get_config('remote-hg.hg-git-compat') == 'true\n':
750 hg_git_compat = True
751 track_branches = False
752 if get_config('remote-hg.track-branches') == 'false\n':
753 track_branches = False
754 except subprocess.CalledProcessError:
755 pass
757 if hg_git_compat:
758 mode = 'hg'
759 bad_mail = 'none@none'
760 bad_name = ''
761 else:
762 mode = 'git'
763 bad_mail = 'unknown'
764 bad_name = 'Unknown'
766 if alias[4:] == url:
767 is_tmp = True
768 alias = util.sha1(alias).hexdigest()
769 else:
770 is_tmp = False
772 gitdir = os.environ['GIT_DIR']
773 dirname = os.path.join(gitdir, 'hg', alias)
774 branches = {}
775 bmarks = {}
776 blob_marks = {}
777 parsed_refs = {}
779 repo = get_repo(url, alias)
780 prefix = 'refs/hg/%s' % alias
782 if not is_tmp:
783 fix_path(alias, peer or repo, url)
785 if not os.path.exists(dirname):
786 os.makedirs(dirname)
788 marks_path = os.path.join(dirname, 'marks-hg')
789 marks = Marks(marks_path)
791 parser = Parser(repo)
792 for line in parser:
793 if parser.check('capabilities'):
794 do_capabilities(parser)
795 elif parser.check('list'):
796 do_list(parser)
797 elif parser.check('import'):
798 do_import(parser)
799 elif parser.check('export'):
800 do_export(parser)
801 else:
802 die('unhandled command: %s' % line)
803 sys.stdout.flush()
805 if not is_tmp:
806 marks.store()
807 else:
808 shutil.rmtree(dirname)
810 sys.exit(main(sys.argv))