remote-hg: the author email can be null
[git/mingw.git] / contrib / remote-helpers / git-remote-hg
blob7929eec30b39280985a9d2b7147699d99e211817
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
19 import subprocess
20 import urllib
23 # If you want to switch to hg-git compatibility mode:
24 # git config --global remote-hg.hg-git-compat true
26 # git:
27 # Sensible defaults for git.
28 # hg bookmarks are exported as git branches, hg branches are prefixed
29 # with 'branches/', HEAD is a special case.
31 # hg:
32 # Emulate hg-git.
33 # Only hg bookmarks are exported as git branches.
34 # Commits are modified to preserve hg information and allow biridectionality.
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
39 AUTHOR_HG_RE = re.compile('^(.*?) ?<(.*?)(?:>(.+)?)?$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gitmode(flags):
50 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
52 def gittz(tz):
53 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
55 def hgmode(mode):
56 m = { '0100755': 'x', '0120000': 'l' }
57 return m.get(mode, '')
59 class Marks:
61 def __init__(self, path):
62 self.path = path
63 self.tips = {}
64 self.marks = {}
65 self.rev_marks = {}
66 self.last_mark = 0
68 self.load()
70 def load(self):
71 if not os.path.exists(self.path):
72 return
74 tmp = json.load(open(self.path))
76 self.tips = tmp['tips']
77 self.marks = tmp['marks']
78 self.last_mark = tmp['last-mark']
80 for rev, mark in self.marks.iteritems():
81 self.rev_marks[mark] = int(rev)
83 def dict(self):
84 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
86 def store(self):
87 json.dump(self.dict(), open(self.path, 'w'))
89 def __str__(self):
90 return str(self.dict())
92 def from_rev(self, rev):
93 return self.marks[str(rev)]
95 def to_rev(self, mark):
96 return self.rev_marks[mark]
98 def get_mark(self, rev):
99 self.last_mark += 1
100 self.marks[str(rev)] = self.last_mark
101 return self.last_mark
103 def new_mark(self, rev, mark):
104 self.marks[str(rev)] = mark
105 self.rev_marks[mark] = rev
106 self.last_mark = mark
108 def is_marked(self, rev):
109 return self.marks.has_key(str(rev))
111 def get_tip(self, branch):
112 return self.tips.get(branch, 0)
114 def set_tip(self, branch, tip):
115 self.tips[branch] = tip
117 class Parser:
119 def __init__(self, repo):
120 self.repo = repo
121 self.line = self.get_line()
123 def get_line(self):
124 return sys.stdin.readline().strip()
126 def __getitem__(self, i):
127 return self.line.split()[i]
129 def check(self, word):
130 return self.line.startswith(word)
132 def each_block(self, separator):
133 while self.line != separator:
134 yield self.line
135 self.line = self.get_line()
137 def __iter__(self):
138 return self.each_block('')
140 def next(self):
141 self.line = self.get_line()
142 if self.line == 'done':
143 self.line = None
145 def get_mark(self):
146 i = self.line.index(':') + 1
147 return int(self.line[i:])
149 def get_data(self):
150 if not self.check('data'):
151 return None
152 i = self.line.index(' ') + 1
153 size = int(self.line[i:])
154 return sys.stdin.read(size)
156 def get_author(self):
157 global bad_mail
159 ex = None
160 m = RAW_AUTHOR_RE.match(self.line)
161 if not m:
162 return None
163 _, name, email, date, tz = m.groups()
164 if name and 'ext:' in name:
165 m = re.match('^(.+?) ext:\((.+)\)$', name)
166 if m:
167 name = m.group(1)
168 ex = urllib.unquote(m.group(2))
170 if email != bad_mail:
171 if name:
172 user = '%s <%s>' % (name, email)
173 else:
174 user = '<%s>' % (email)
175 else:
176 user = name
178 if ex:
179 user += ex
181 tz = int(tz)
182 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
183 return (user, int(date), -tz)
185 def export_file(fc):
186 d = fc.data()
187 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
188 print "data %d" % len(d)
189 print d
191 def get_filechanges(repo, ctx, parent):
192 modified = set()
193 added = set()
194 removed = set()
196 cur = ctx.manifest()
197 prev = repo[parent].manifest().copy()
199 for fn in cur:
200 if fn in prev:
201 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
202 modified.add(fn)
203 del prev[fn]
204 else:
205 added.add(fn)
206 removed |= set(prev.keys())
208 return added | modified, removed
210 def fixup_user_git(user):
211 name = mail = None
212 user = user.replace('"', '')
213 m = AUTHOR_RE.match(user)
214 if m:
215 name = m.group(1)
216 mail = m.group(2).strip()
217 else:
218 m = NAME_RE.match(user)
219 if m:
220 name = m.group(1).strip()
221 return (name, mail)
223 def fixup_user_hg(user):
224 def sanitize(name):
225 # stole this from hg-git
226 return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
228 m = AUTHOR_HG_RE.match(user)
229 if m:
230 name = sanitize(m.group(1))
231 mail = sanitize(m.group(2))
232 ex = m.group(3)
233 if ex:
234 name += ' ext:(' + urllib.quote(ex) + ')'
235 else:
236 name = sanitize(user)
237 if '@' in user:
238 mail = name
239 else:
240 mail = None
242 return (name, mail)
244 def fixup_user(user):
245 global mode, bad_mail
247 if mode == 'git':
248 name, mail = fixup_user_git(user)
249 else:
250 name, mail = fixup_user_hg(user)
252 if not name:
253 name = bad_name
254 if not mail:
255 mail = bad_mail
257 return '%s <%s>' % (name, mail)
259 def get_repo(url, alias):
260 global dirname, peer
262 myui = ui.ui()
263 myui.setconfig('ui', 'interactive', 'off')
265 if hg.islocal(url):
266 repo = hg.repository(myui, url)
267 else:
268 local_path = os.path.join(dirname, 'clone')
269 if not os.path.exists(local_path):
270 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
271 repo = dstpeer.local()
272 else:
273 repo = hg.repository(myui, local_path)
274 peer = hg.peer(myui, {}, url)
275 repo.pull(peer, heads=None, force=True)
277 return repo
279 def rev_to_mark(rev):
280 global marks
281 return marks.from_rev(rev)
283 def mark_to_rev(mark):
284 global marks
285 return marks.to_rev(mark)
287 def export_ref(repo, name, kind, head):
288 global prefix, marks, mode
290 ename = '%s/%s' % (kind, name)
291 tip = marks.get_tip(ename)
293 # mercurial takes too much time checking this
294 if tip and tip == head.rev():
295 # nothing to do
296 return
297 revs = repo.revs('%u:%u' % (tip, head))
298 count = 0
300 revs = [rev for rev in revs if not marks.is_marked(rev)]
302 for rev in revs:
304 c = repo[rev]
305 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
306 rev_branch = extra['branch']
308 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
309 if 'committer' in extra:
310 user, time, tz = extra['committer'].rsplit(' ', 2)
311 committer = "%s %s %s" % (user, time, gittz(int(tz)))
312 else:
313 committer = author
315 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
317 if len(parents) == 0:
318 modified = c.manifest().keys()
319 removed = []
320 else:
321 modified, removed = get_filechanges(repo, c, parents[0])
323 if mode == 'hg':
324 extra_msg = ''
326 if rev_branch != 'default':
327 extra_msg += 'branch : %s\n' % rev_branch
329 renames = []
330 for f in c.files():
331 if f not in c.manifest():
332 continue
333 rename = c.filectx(f).renamed()
334 if rename:
335 renames.append((rename[0], f))
337 for e in renames:
338 extra_msg += "rename : %s => %s\n" % e
340 for key, value in extra.iteritems():
341 if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
342 continue
343 else:
344 extra_msg += "extra : %s : %s\n" % (key, urllib.quote(value))
346 desc += '\n'
347 if extra_msg:
348 desc += '\n--HG--\n' + extra_msg
350 if len(parents) == 0 and rev:
351 print 'reset %s/%s' % (prefix, ename)
353 print "commit %s/%s" % (prefix, ename)
354 print "mark :%d" % (marks.get_mark(rev))
355 print "author %s" % (author)
356 print "committer %s" % (committer)
357 print "data %d" % (len(desc))
358 print desc
360 if len(parents) > 0:
361 print "from :%s" % (rev_to_mark(parents[0]))
362 if len(parents) > 1:
363 print "merge :%s" % (rev_to_mark(parents[1]))
365 for f in modified:
366 export_file(c.filectx(f))
367 for f in removed:
368 print "D %s" % (f)
369 print
371 count += 1
372 if (count % 100 == 0):
373 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
374 print "#############################################################"
376 # make sure the ref is updated
377 print "reset %s/%s" % (prefix, ename)
378 print "from :%u" % rev_to_mark(rev)
379 print
381 marks.set_tip(ename, rev)
383 def export_tag(repo, tag):
384 export_ref(repo, tag, 'tags', repo[tag])
386 def export_bookmark(repo, bmark):
387 head = bmarks[bmark]
388 export_ref(repo, bmark, 'bookmarks', head)
390 def export_branch(repo, branch):
391 tip = get_branch_tip(repo, branch)
392 head = repo[tip]
393 export_ref(repo, branch, 'branches', head)
395 def export_head(repo):
396 global g_head
397 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
399 def do_capabilities(parser):
400 global prefix, dirname
402 print "import"
403 print "export"
404 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
405 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
406 print "refspec refs/tags/*:%s/tags/*" % prefix
408 path = os.path.join(dirname, 'marks-git')
410 if os.path.exists(path):
411 print "*import-marks %s" % path
412 print "*export-marks %s" % path
414 print
416 def get_branch_tip(repo, branch):
417 global branches
419 heads = branches.get(branch, None)
420 if not heads:
421 return None
423 # verify there's only one head
424 if (len(heads) > 1):
425 warn("Branch '%s' has more than one head, consider merging" % branch)
426 # older versions of mercurial don't have this
427 if hasattr(repo, "branchtip"):
428 return repo.branchtip(branch)
430 return heads[0]
432 def list_head(repo, cur):
433 global g_head, bmarks
435 head = bookmarks.readcurrent(repo)
436 if head:
437 node = repo[head]
438 else:
439 # fake bookmark from current branch
440 head = cur
441 node = repo['.']
442 if not node:
443 return
444 if head == 'default':
445 head = 'master'
446 bmarks[head] = node
448 print "@refs/heads/%s HEAD" % head
449 g_head = (head, node)
451 def do_list(parser):
452 global branches, bmarks, mode, track_branches
454 repo = parser.repo
455 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
456 bmarks[bmark] = repo[node]
458 cur = repo.dirstate.branch()
460 list_head(repo, cur)
462 if track_branches:
463 for branch in repo.branchmap():
464 heads = repo.branchheads(branch)
465 if len(heads):
466 branches[branch] = heads
468 for branch in branches:
469 print "? refs/heads/branches/%s" % branch
471 for bmark in bmarks:
472 print "? refs/heads/%s" % bmark
474 for tag, node in repo.tagslist():
475 if tag == 'tip':
476 continue
477 print "? refs/tags/%s" % tag
479 print
481 def do_import(parser):
482 repo = parser.repo
484 path = os.path.join(dirname, 'marks-git')
486 print "feature done"
487 if os.path.exists(path):
488 print "feature import-marks=%s" % path
489 print "feature export-marks=%s" % path
490 sys.stdout.flush()
492 tmp = encoding.encoding
493 encoding.encoding = 'utf-8'
495 # lets get all the import lines
496 while parser.check('import'):
497 ref = parser[1]
499 if (ref == 'HEAD'):
500 export_head(repo)
501 elif ref.startswith('refs/heads/branches/'):
502 branch = ref[len('refs/heads/branches/'):]
503 export_branch(repo, branch)
504 elif ref.startswith('refs/heads/'):
505 bmark = ref[len('refs/heads/'):]
506 export_bookmark(repo, bmark)
507 elif ref.startswith('refs/tags/'):
508 tag = ref[len('refs/tags/'):]
509 export_tag(repo, tag)
511 parser.next()
513 encoding.encoding = tmp
515 print 'done'
517 def parse_blob(parser):
518 global blob_marks
520 parser.next()
521 mark = parser.get_mark()
522 parser.next()
523 data = parser.get_data()
524 blob_marks[mark] = data
525 parser.next()
526 return
528 def get_merge_files(repo, p1, p2, files):
529 for e in repo[p1].files():
530 if e not in files:
531 if e not in repo[p1].manifest():
532 continue
533 f = { 'ctx' : repo[p1][e] }
534 files[e] = f
536 def parse_commit(parser):
537 global marks, blob_marks, bmarks, parsed_refs
538 global mode
540 from_mark = merge_mark = None
542 ref = parser[1]
543 parser.next()
545 commit_mark = parser.get_mark()
546 parser.next()
547 author = parser.get_author()
548 parser.next()
549 committer = parser.get_author()
550 parser.next()
551 data = parser.get_data()
552 parser.next()
553 if parser.check('from'):
554 from_mark = parser.get_mark()
555 parser.next()
556 if parser.check('merge'):
557 merge_mark = parser.get_mark()
558 parser.next()
559 if parser.check('merge'):
560 die('octopus merges are not supported yet')
562 files = {}
564 for line in parser:
565 if parser.check('M'):
566 t, m, mark_ref, path = line.split(' ')
567 mark = int(mark_ref[1:])
568 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
569 elif parser.check('D'):
570 t, path = line.split(' ')
571 f = { 'deleted' : True }
572 else:
573 die('Unknown file command: %s' % line)
574 files[path] = f
576 def getfilectx(repo, memctx, f):
577 of = files[f]
578 if 'deleted' in of:
579 raise IOError
580 if 'ctx' in of:
581 return of['ctx']
582 is_exec = of['mode'] == 'x'
583 is_link = of['mode'] == 'l'
584 rename = of.get('rename', None)
585 return context.memfilectx(f, of['data'],
586 is_link, is_exec, rename)
588 repo = parser.repo
590 user, date, tz = author
591 extra = {}
593 if committer != author:
594 extra['committer'] = "%s %u %u" % committer
596 if from_mark:
597 p1 = repo.changelog.node(mark_to_rev(from_mark))
598 else:
599 p1 = '\0' * 20
601 if merge_mark:
602 p2 = repo.changelog.node(mark_to_rev(merge_mark))
603 else:
604 p2 = '\0' * 20
607 # If files changed from any of the parents, hg wants to know, but in git if
608 # nothing changed from the first parent, nothing changed.
610 if merge_mark:
611 get_merge_files(repo, p1, p2, files)
613 if mode == 'hg':
614 i = data.find('\n--HG--\n')
615 if i >= 0:
616 tmp = data[i + len('\n--HG--\n'):].strip()
617 for k, v in [e.split(' : ') for e in tmp.split('\n')]:
618 if k == 'rename':
619 old, new = v.split(' => ', 1)
620 files[new]['rename'] = old
621 elif k == 'branch':
622 extra[k] = v
623 elif k == 'extra':
624 ek, ev = v.split(' : ', 1)
625 extra[ek] = urllib.unquote(ev)
626 data = data[:i]
628 ctx = context.memctx(repo, (p1, p2), data,
629 files.keys(), getfilectx,
630 user, (date, tz), extra)
632 tmp = encoding.encoding
633 encoding.encoding = 'utf-8'
635 node = repo.commitctx(ctx)
637 encoding.encoding = tmp
639 rev = repo[node].rev()
641 parsed_refs[ref] = node
643 marks.new_mark(rev, commit_mark)
645 def parse_reset(parser):
646 ref = parser[1]
647 parser.next()
648 # ugh
649 if parser.check('commit'):
650 parse_commit(parser)
651 return
652 if not parser.check('from'):
653 return
654 from_mark = parser.get_mark()
655 parser.next()
657 node = parser.repo.changelog.node(mark_to_rev(from_mark))
658 parsed_refs[ref] = node
660 def parse_tag(parser):
661 name = parser[1]
662 parser.next()
663 from_mark = parser.get_mark()
664 parser.next()
665 tagger = parser.get_author()
666 parser.next()
667 data = parser.get_data()
668 parser.next()
670 # nothing to do
672 def do_export(parser):
673 global parsed_refs, bmarks, peer
675 parser.next()
677 for line in parser.each_block('done'):
678 if parser.check('blob'):
679 parse_blob(parser)
680 elif parser.check('commit'):
681 parse_commit(parser)
682 elif parser.check('reset'):
683 parse_reset(parser)
684 elif parser.check('tag'):
685 parse_tag(parser)
686 elif parser.check('feature'):
687 pass
688 else:
689 die('unhandled export command: %s' % line)
691 for ref, node in parsed_refs.iteritems():
692 if ref.startswith('refs/heads/branches'):
693 pass
694 elif ref.startswith('refs/heads/'):
695 bmark = ref[len('refs/heads/'):]
696 if bmark in bmarks:
697 old = bmarks[bmark].hex()
698 else:
699 old = ''
700 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
701 continue
702 elif ref.startswith('refs/tags/'):
703 tag = ref[len('refs/tags/'):]
704 parser.repo.tag([tag], node, None, True, None, {})
705 print "ok %s" % ref
707 print
709 if peer:
710 parser.repo.push(peer, force=False)
712 def main(args):
713 global prefix, dirname, branches, bmarks
714 global marks, blob_marks, parsed_refs
715 global peer, mode, bad_mail, bad_name
716 global track_branches
718 alias = args[1]
719 url = args[2]
720 peer = None
722 hg_git_compat = False
723 track_branches = True
724 try:
725 cmd = ['git', 'config', '--get', 'remote-hg.hg-git-compat']
726 if subprocess.check_output(cmd) == 'true\n':
727 hg_git_compat = True
728 track_branches = False
729 cmd = ['git', 'config', '--get', 'remote-hg.track-branches']
730 if subprocess.check_output(cmd) == 'false\n':
731 track_branches = False
732 except subprocess.CalledProcessError:
733 pass
735 if hg_git_compat:
736 mode = 'hg'
737 bad_mail = 'none@none'
738 bad_name = ''
739 else:
740 mode = 'git'
741 bad_mail = 'unknown'
742 bad_name = 'Unknown'
744 if alias[4:] == url:
745 is_tmp = True
746 alias = util.sha1(alias).hexdigest()
747 else:
748 is_tmp = False
750 gitdir = os.environ['GIT_DIR']
751 dirname = os.path.join(gitdir, 'hg', alias)
752 branches = {}
753 bmarks = {}
754 blob_marks = {}
755 parsed_refs = {}
757 repo = get_repo(url, alias)
758 prefix = 'refs/hg/%s' % alias
760 if not os.path.exists(dirname):
761 os.makedirs(dirname)
763 marks_path = os.path.join(dirname, 'marks-hg')
764 marks = Marks(marks_path)
766 parser = Parser(repo)
767 for line in parser:
768 if parser.check('capabilities'):
769 do_capabilities(parser)
770 elif parser.check('list'):
771 do_list(parser)
772 elif parser.check('import'):
773 do_import(parser)
774 elif parser.check('export'):
775 do_export(parser)
776 else:
777 die('unhandled command: %s' % line)
778 sys.stdout.flush()
780 if not is_tmp:
781 marks.store()
782 else:
783 shutil.rmtree(dirname)
785 sys.exit(main(sys.argv))