remote-hg: fix for files with spaces
[git/mingw.git] / contrib / remote-helpers / git-remote-hg
blob62c39db5b40c43b1098eb55eb689a4fb8b981f38
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # git:
27 # Sensible defaults for git.
28 # hg bookmarks are exported as git branches, hg branches are prefixed
29 # with 'branches/', HEAD is a special case.
31 # hg:
32 # Emulate hg-git.
33 # Only hg bookmarks are exported as git branches.
34 # Commits are modified to preserve hg information and allow biridectionality.
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
39 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gitmode(flags):
50 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
52 def gittz(tz):
53 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
55 def hgmode(mode):
56 m = { '0100755': 'x', '0120000': 'l' }
57 return m.get(mode, '')
59 class Marks:
61 def __init__(self, path):
62 self.path = path
63 self.tips = {}
64 self.marks = {}
65 self.rev_marks = {}
66 self.last_mark = 0
68 self.load()
70 def load(self):
71 if not os.path.exists(self.path):
72 return
74 tmp = json.load(open(self.path))
76 self.tips = tmp['tips']
77 self.marks = tmp['marks']
78 self.last_mark = tmp['last-mark']
80 for rev, mark in self.marks.iteritems():
81 self.rev_marks[mark] = int(rev)
83 def dict(self):
84 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
86 def store(self):
87 json.dump(self.dict(), open(self.path, 'w'))
89 def __str__(self):
90 return str(self.dict())
92 def from_rev(self, rev):
93 return self.marks[str(rev)]
95 def to_rev(self, mark):
96 return self.rev_marks[mark]
98 def get_mark(self, rev):
99 self.last_mark += 1
100 self.marks[str(rev)] = self.last_mark
101 return self.last_mark
103 def new_mark(self, rev, mark):
104 self.marks[str(rev)] = mark
105 self.rev_marks[mark] = rev
106 self.last_mark = mark
108 def is_marked(self, rev):
109 return self.marks.has_key(str(rev))
111 def get_tip(self, branch):
112 return self.tips.get(branch, 0)
114 def set_tip(self, branch, tip):
115 self.tips[branch] = tip
117 class Parser:
119 def __init__(self, repo):
120 self.repo = repo
121 self.line = self.get_line()
123 def get_line(self):
124 return sys.stdin.readline().strip()
126 def __getitem__(self, i):
127 return self.line.split()[i]
129 def check(self, word):
130 return self.line.startswith(word)
132 def each_block(self, separator):
133 while self.line != separator:
134 yield self.line
135 self.line = self.get_line()
137 def __iter__(self):
138 return self.each_block('')
140 def next(self):
141 self.line = self.get_line()
142 if self.line == 'done':
143 self.line = None
145 def get_mark(self):
146 i = self.line.index(':') + 1
147 return int(self.line[i:])
149 def get_data(self):
150 if not self.check('data'):
151 return None
152 i = self.line.index(' ') + 1
153 size = int(self.line[i:])
154 return sys.stdin.read(size)
156 def get_author(self):
157 global bad_mail
159 ex = None
160 m = RAW_AUTHOR_RE.match(self.line)
161 if not m:
162 return None
163 _, name, email, date, tz = m.groups()
164 if name and 'ext:' in name:
165 m = re.match('^(.+?) ext:\((.+)\)$', name)
166 if m:
167 name = m.group(1)
168 ex = urllib.unquote(m.group(2))
170 if email != bad_mail:
171 if name:
172 user = '%s <%s>' % (name, email)
173 else:
174 user = '<%s>' % (email)
175 else:
176 user = name
178 if ex:
179 user += ex
181 tz = int(tz)
182 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
183 return (user, int(date), -tz)
185 def export_file(fc):
186 d = fc.data()
187 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
188 print "data %d" % len(d)
189 print d
191 def get_filechanges(repo, ctx, parent):
192 modified = set()
193 added = set()
194 removed = set()
196 cur = ctx.manifest()
197 prev = repo[parent].manifest().copy()
199 for fn in cur:
200 if fn in prev:
201 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
202 modified.add(fn)
203 del prev[fn]
204 else:
205 added.add(fn)
206 removed |= set(prev.keys())
208 return added | modified, removed
210 def fixup_user_git(user):
211 name = mail = None
212 user = user.replace('"', '')
213 m = AUTHOR_RE.match(user)
214 if m:
215 name = m.group(1)
216 mail = m.group(2).strip()
217 else:
218 m = NAME_RE.match(user)
219 if m:
220 name = m.group(1).strip()
221 return (name, mail)
223 def fixup_user_hg(user):
224 def sanitize(name):
225 # stole this from hg-git
226 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
228 m = AUTHOR_HG_RE.match(user)
229 if m:
230 name = sanitize(m.group(1))
231 mail = sanitize(m.group(2))
232 ex = m.group(3)
233 if ex:
234 name += ' ext:(' + urllib.quote(ex) + ')'
235 else:
236 name = sanitize(user)
237 if '@' in user:
238 mail = name
239 else:
240 mail = None
242 return (name, mail)
244 def fixup_user(user):
245 global mode, bad_mail
247 if mode == 'git':
248 name, mail = fixup_user_git(user)
249 else:
250 name, mail = fixup_user_hg(user)
252 if not name:
253 name = bad_name
254 if not mail:
255 mail = bad_mail
257 return '%s <%s>' % (name, mail)
259 def get_repo(url, alias):
260 global dirname, peer
262 myui = ui.ui()
263 myui.setconfig('ui', 'interactive', 'off')
265 if hg.islocal(url):
266 repo = hg.repository(myui, url)
267 else:
268 local_path = os.path.join(dirname, 'clone')
269 if not os.path.exists(local_path):
270 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
271 repo = dstpeer.local()
272 else:
273 repo = hg.repository(myui, local_path)
274 peer = hg.peer(myui, {}, url)
275 repo.pull(peer, heads=None, force=True)
277 return repo
279 def rev_to_mark(rev):
280 global marks
281 return marks.from_rev(rev)
283 def mark_to_rev(mark):
284 global marks
285 return marks.to_rev(mark)
287 def export_ref(repo, name, kind, head):
288 global prefix, marks, mode
290 ename = '%s/%s' % (kind, name)
291 tip = marks.get_tip(ename)
293 # mercurial takes too much time checking this
294 if tip and tip == head.rev():
295 # nothing to do
296 return
297 revs = xrange(tip, head.rev() + 1)
298 count = 0
300 revs = [rev for rev in revs if not marks.is_marked(rev)]
302 for rev in revs:
304 c = repo[rev]
305 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
306 rev_branch = extra['branch']
308 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
309 if 'committer' in extra:
310 user, time, tz = extra['committer'].rsplit(' ', 2)
311 committer = "%s %s %s" % (user, time, gittz(int(tz)))
312 else:
313 committer = author
315 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
317 if len(parents) == 0:
318 modified = c.manifest().keys()
319 removed = []
320 else:
321 modified, removed = get_filechanges(repo, c, parents[0])
323 if mode == 'hg':
324 extra_msg = ''
326 if rev_branch != 'default':
327 extra_msg += 'branch : %s\n' % rev_branch
329 renames = []
330 for f in c.files():
331 if f not in c.manifest():
332 continue
333 rename = c.filectx(f).renamed()
334 if rename:
335 renames.append((rename[0], f))
337 for e in renames:
338 extra_msg += "rename : %s => %s\n" % e
340 for key, value in extra.iteritems():
341 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
342 continue
343 else:
344 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
346 desc += '\n'
347 if extra_msg:
348 desc += '\n--HG--\n' + extra_msg
350 if len(parents) == 0 and rev:
351 print 'reset %s/%s' % (prefix, ename)
353 print "commit %s/%s" % (prefix, ename)
354 print "mark :%d" % (marks.get_mark(rev))
355 print "author %s" % (author)
356 print "committer %s" % (committer)
357 print "data %d" % (len(desc))
358 print desc
360 if len(parents) > 0:
361 print "from :%s" % (rev_to_mark(parents[0]))
362 if len(parents) > 1:
363 print "merge :%s" % (rev_to_mark(parents[1]))
365 for f in modified:
366 export_file(c.filectx(f))
367 for f in removed:
368 print "D %s" % (f)
369 print
371 count += 1
372 if (count % 100 == 0):
373 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
374 print "#############################################################"
376 # make sure the ref is updated
377 print "reset %s/%s" % (prefix, ename)
378 print "from :%u" % rev_to_mark(rev)
379 print
381 marks.set_tip(ename, rev)
383 def export_tag(repo, tag):
384 export_ref(repo, tag, 'tags', repo[tag])
386 def export_bookmark(repo, bmark):
387 head = bmarks[bmark]
388 export_ref(repo, bmark, 'bookmarks', head)
390 def export_branch(repo, branch):
391 tip = get_branch_tip(repo, branch)
392 head = repo[tip]
393 export_ref(repo, branch, 'branches', head)
395 def export_head(repo):
396 global g_head
397 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
399 def do_capabilities(parser):
400 global prefix, dirname
402 print "import"
403 print "export"
404 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
405 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
406 print "refspec refs/tags/*:%s/tags/*" % prefix
408 path = os.path.join(dirname, 'marks-git')
410 if os.path.exists(path):
411 print "*import-marks %s" % path
412 print "*export-marks %s" % path
414 print
416 def get_branch_tip(repo, branch):
417 global branches
419 heads = branches.get(branch, None)
420 if not heads:
421 return None
423 # verify there's only one head
424 if (len(heads) > 1):
425 warn("Branch '%s' has more than one head, consider merging" % branch)
426 # older versions of mercurial don't have this
427 if hasattr(repo, "branchtip"):
428 return repo.branchtip(branch)
430 return heads[0]
432 def list_head(repo, cur):
433 global g_head, bmarks
435 head = bookmarks.readcurrent(repo)
436 if head:
437 node = repo[head]
438 else:
439 # fake bookmark from current branch
440 head = cur
441 node = repo['.']
442 if not node:
443 node = repo['tip']
444 if not node:
445 return
446 if head == 'default':
447 head = 'master'
448 bmarks[head] = node
450 print "@refs/heads/%s HEAD" % head
451 g_head = (head, node)
453 def do_list(parser):
454 global branches, bmarks, mode, track_branches
456 repo = parser.repo
457 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
458 bmarks[bmark] = repo[node]
460 cur = repo.dirstate.branch()
462 list_head(repo, cur)
464 if track_branches:
465 for branch in repo.branchmap():
466 heads = repo.branchheads(branch)
467 if len(heads):
468 branches[branch] = heads
470 for branch in branches:
471 print "? refs/heads/branches/%s" % branch
473 for bmark in bmarks:
474 print "? refs/heads/%s" % bmark
476 for tag, node in repo.tagslist():
477 if tag == 'tip':
478 continue
479 print "? refs/tags/%s" % tag
481 print
483 def do_import(parser):
484 repo = parser.repo
486 path = os.path.join(dirname, 'marks-git')
488 print "feature done"
489 if os.path.exists(path):
490 print "feature import-marks=%s" % path
491 print "feature export-marks=%s" % path
492 sys.stdout.flush()
494 tmp = encoding.encoding
495 encoding.encoding = 'utf-8'
497 # lets get all the import lines
498 while parser.check('import'):
499 ref = parser[1]
501 if (ref == 'HEAD'):
502 export_head(repo)
503 elif ref.startswith('refs/heads/branches/'):
504 branch = ref[len('refs/heads/branches/'):]
505 export_branch(repo, branch)
506 elif ref.startswith('refs/heads/'):
507 bmark = ref[len('refs/heads/'):]
508 export_bookmark(repo, bmark)
509 elif ref.startswith('refs/tags/'):
510 tag = ref[len('refs/tags/'):]
511 export_tag(repo, tag)
513 parser.next()
515 encoding.encoding = tmp
517 print 'done'
519 def parse_blob(parser):
520 global blob_marks
522 parser.next()
523 mark = parser.get_mark()
524 parser.next()
525 data = parser.get_data()
526 blob_marks[mark] = data
527 parser.next()
528 return
530 def get_merge_files(repo, p1, p2, files):
531 for e in repo[p1].files():
532 if e not in files:
533 if e not in repo[p1].manifest():
534 continue
535 f = { 'ctx' : repo[p1][e] }
536 files[e] = f
538 def parse_commit(parser):
539 global marks, blob_marks, bmarks, parsed_refs
540 global mode
542 from_mark = merge_mark = None
544 ref = parser[1]
545 parser.next()
547 commit_mark = parser.get_mark()
548 parser.next()
549 author = parser.get_author()
550 parser.next()
551 committer = parser.get_author()
552 parser.next()
553 data = parser.get_data()
554 parser.next()
555 if parser.check('from'):
556 from_mark = parser.get_mark()
557 parser.next()
558 if parser.check('merge'):
559 merge_mark = parser.get_mark()
560 parser.next()
561 if parser.check('merge'):
562 die('octopus merges are not supported yet')
564 files = {}
566 for line in parser:
567 if parser.check('M'):
568 t, m, mark_ref, path = line.split(' ', 3)
569 mark = int(mark_ref[1:])
570 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
571 elif parser.check('D'):
572 t, path = line.split(' ')
573 f = { 'deleted' : True }
574 else:
575 die('Unknown file command: %s' % line)
576 files[path] = f
578 def getfilectx(repo, memctx, f):
579 of = files[f]
580 if 'deleted' in of:
581 raise IOError
582 if 'ctx' in of:
583 return of['ctx']
584 is_exec = of['mode'] == 'x'
585 is_link = of['mode'] == 'l'
586 rename = of.get('rename', None)
587 return context.memfilectx(f, of['data'],
588 is_link, is_exec, rename)
590 repo = parser.repo
592 user, date, tz = author
593 extra = {}
595 if committer != author:
596 extra['committer'] = "%s %u %u" % committer
598 if from_mark:
599 p1 = repo.changelog.node(mark_to_rev(from_mark))
600 else:
601 p1 = '\0' * 20
603 if merge_mark:
604 p2 = repo.changelog.node(mark_to_rev(merge_mark))
605 else:
606 p2 = '\0' * 20
609 # If files changed from any of the parents, hg wants to know, but in git if
610 # nothing changed from the first parent, nothing changed.
612 if merge_mark:
613 get_merge_files(repo, p1, p2, files)
615 if mode == 'hg':
616 i = data.find('\n--HG--\n')
617 if i >= 0:
618 tmp = data[i + len('\n--HG--\n'):].strip()
619 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
620 if k == 'rename':
621 old, new = v.split(' => ', 1)
622 files[new]['rename'] = old
623 elif k == 'branch':
624 extra[k] = v
625 elif k == 'extra':
626 ek, ev = v.split(' : ', 1)
627 extra[ek] = urllib.unquote(ev)
628 data = data[:i]
630 ctx = context.memctx(repo, (p1, p2), data,
631 files.keys(), getfilectx,
632 user, (date, tz), extra)
634 tmp = encoding.encoding
635 encoding.encoding = 'utf-8'
637 node = repo.commitctx(ctx)
639 encoding.encoding = tmp
641 rev = repo[node].rev()
643 parsed_refs[ref] = node
645 marks.new_mark(rev, commit_mark)
647 def parse_reset(parser):
648 ref = parser[1]
649 parser.next()
650 # ugh
651 if parser.check('commit'):
652 parse_commit(parser)
653 return
654 if not parser.check('from'):
655 return
656 from_mark = parser.get_mark()
657 parser.next()
659 node = parser.repo.changelog.node(mark_to_rev(from_mark))
660 parsed_refs[ref] = node
662 def parse_tag(parser):
663 name = parser[1]
664 parser.next()
665 from_mark = parser.get_mark()
666 parser.next()
667 tagger = parser.get_author()
668 parser.next()
669 data = parser.get_data()
670 parser.next()
672 # nothing to do
674 def do_export(parser):
675 global parsed_refs, bmarks, peer
677 parser.next()
679 for line in parser.each_block('done'):
680 if parser.check('blob'):
681 parse_blob(parser)
682 elif parser.check('commit'):
683 parse_commit(parser)
684 elif parser.check('reset'):
685 parse_reset(parser)
686 elif parser.check('tag'):
687 parse_tag(parser)
688 elif parser.check('feature'):
689 pass
690 else:
691 die('unhandled export command: %s' % line)
693 for ref, node in parsed_refs.iteritems():
694 if ref.startswith('refs/heads/branches'):
695 pass
696 elif ref.startswith('refs/heads/'):
697 bmark = ref[len('refs/heads/'):]
698 if bmark in bmarks:
699 old = bmarks[bmark].hex()
700 else:
701 old = ''
702 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
703 continue
704 elif ref.startswith('refs/tags/'):
705 tag = ref[len('refs/tags/'):]
706 parser.repo.tag([tag], node, None, True, None, {})
707 else:
708 # transport-helper/fast-export bugs
709 continue
710 print "ok %s" % ref
712 print
714 if peer:
715 parser.repo.push(peer, force=False)
717 def main(args):
718 global prefix, dirname, branches, bmarks
719 global marks, blob_marks, parsed_refs
720 global peer, mode, bad_mail, bad_name
721 global track_branches
723 alias = args[1]
724 url = args[2]
725 peer = None
727 hg_git_compat = False
728 track_branches = True
729 try:
730 cmd = ['git', 'config', '--get', 'remote-hg.hg-git-compat']
731 if subprocess.check_output(cmd) == 'true\n':
732 hg_git_compat = True
733 track_branches = False
734 cmd = ['git', 'config', '--get', 'remote-hg.track-branches']
735 if subprocess.check_output(cmd) == 'false\n':
736 track_branches = False
737 except subprocess.CalledProcessError:
738 pass
740 if hg_git_compat:
741 mode = 'hg'
742 bad_mail = 'none@none'
743 bad_name = ''
744 else:
745 mode = 'git'
746 bad_mail = 'unknown'
747 bad_name = 'Unknown'
749 if alias[4:] == url:
750 is_tmp = True
751 alias = util.sha1(alias).hexdigest()
752 else:
753 is_tmp = False
755 gitdir = os.environ['GIT_DIR']
756 dirname = os.path.join(gitdir, 'hg', alias)
757 branches = {}
758 bmarks = {}
759 blob_marks = {}
760 parsed_refs = {}
762 repo = get_repo(url, alias)
763 prefix = 'refs/hg/%s' % alias
765 if not os.path.exists(dirname):
766 os.makedirs(dirname)
768 marks_path = os.path.join(dirname, 'marks-hg')
769 marks = Marks(marks_path)
771 parser = Parser(repo)
772 for line in parser:
773 if parser.check('capabilities'):
774 do_capabilities(parser)
775 elif parser.check('list'):
776 do_list(parser)
777 elif parser.check('import'):
778 do_import(parser)
779 elif parser.check('export'):
780 do_export(parser)
781 else:
782 die('unhandled command: %s' % line)
783 sys.stdout.flush()
785 if not is_tmp:
786 marks.store()
787 else:
788 shutil.rmtree(dirname)
790 sys.exit(main(sys.argv))