remote-hg: match hg merge behavior
[git/mingw.git] / contrib / remote-helpers / git-remote-hg
blob247b7cbfc9e027c6d4eea010cb132ce5a92dbb99
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial import hg, ui, bookmarks, context, util, encoding
14 import re
15 import sys
16 import os
17 import json
18 import shutil
20 NAME_RE = re.compile('^([^<>]+)')
21 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]+)>$')
22 RAW_AUTHOR_RE = re.compile('^(\w+) (?:(.+)? )?<(.+)> (\d+) ([+-]\d+)')
24 def die(msg, *args):
25 sys.stderr.write('ERROR: %s\n' % (msg % args))
26 sys.exit(1)
28 def warn(msg, *args):
29 sys.stderr.write('WARNING: %s\n' % (msg % args))
31 def gitmode(flags):
32 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
34 def gittz(tz):
35 return '%+03d%02d' % (-tz / 3600, -tz % 3600 / 60)
37 def hgmode(mode):
38 m = { '0100755': 'x', '0120000': 'l' }
39 return m.get(mode, '')
41 class Marks:
43 def __init__(self, path):
44 self.path = path
45 self.tips = {}
46 self.marks = {}
47 self.rev_marks = {}
48 self.last_mark = 0
50 self.load()
52 def load(self):
53 if not os.path.exists(self.path):
54 return
56 tmp = json.load(open(self.path))
58 self.tips = tmp['tips']
59 self.marks = tmp['marks']
60 self.last_mark = tmp['last-mark']
62 for rev, mark in self.marks.iteritems():
63 self.rev_marks[mark] = int(rev)
65 def dict(self):
66 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
68 def store(self):
69 json.dump(self.dict(), open(self.path, 'w'))
71 def __str__(self):
72 return str(self.dict())
74 def from_rev(self, rev):
75 return self.marks[str(rev)]
77 def to_rev(self, mark):
78 return self.rev_marks[mark]
80 def get_mark(self, rev):
81 self.last_mark += 1
82 self.marks[str(rev)] = self.last_mark
83 return self.last_mark
85 def new_mark(self, rev, mark):
86 self.marks[str(rev)] = mark
87 self.rev_marks[mark] = rev
88 self.last_mark = mark
90 def is_marked(self, rev):
91 return self.marks.has_key(str(rev))
93 def get_tip(self, branch):
94 return self.tips.get(branch, 0)
96 def set_tip(self, branch, tip):
97 self.tips[branch] = tip
99 class Parser:
101 def __init__(self, repo):
102 self.repo = repo
103 self.line = self.get_line()
105 def get_line(self):
106 return sys.stdin.readline().strip()
108 def __getitem__(self, i):
109 return self.line.split()[i]
111 def check(self, word):
112 return self.line.startswith(word)
114 def each_block(self, separator):
115 while self.line != separator:
116 yield self.line
117 self.line = self.get_line()
119 def __iter__(self):
120 return self.each_block('')
122 def next(self):
123 self.line = self.get_line()
124 if self.line == 'done':
125 self.line = None
127 def get_mark(self):
128 i = self.line.index(':') + 1
129 return int(self.line[i:])
131 def get_data(self):
132 if not self.check('data'):
133 return None
134 i = self.line.index(' ') + 1
135 size = int(self.line[i:])
136 return sys.stdin.read(size)
138 def get_author(self):
139 m = RAW_AUTHOR_RE.match(self.line)
140 if not m:
141 return None
142 _, name, email, date, tz = m.groups()
144 if email != 'unknown':
145 if name:
146 user = '%s <%s>' % (name, email)
147 else:
148 user = '<%s>' % (email)
149 else:
150 user = name
152 tz = int(tz)
153 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
154 return (user, int(date), -tz)
156 def export_file(fc):
157 d = fc.data()
158 print "M %s inline %s" % (gitmode(fc.flags()), fc.path())
159 print "data %d" % len(d)
160 print d
162 def get_filechanges(repo, ctx, parent):
163 modified = set()
164 added = set()
165 removed = set()
167 cur = ctx.manifest()
168 prev = repo[parent].manifest().copy()
170 for fn in cur:
171 if fn in prev:
172 if (cur.flags(fn) != prev.flags(fn) or cur[fn] != prev[fn]):
173 modified.add(fn)
174 del prev[fn]
175 else:
176 added.add(fn)
177 removed |= set(prev.keys())
179 return added | modified, removed
181 def fixup_user(user):
182 user = user.replace('"', '')
183 name = mail = None
184 m = AUTHOR_RE.match(user)
185 if m:
186 name = m.group(1)
187 mail = m.group(2).strip()
188 else:
189 m = NAME_RE.match(user)
190 if m:
191 name = m.group(1).strip()
193 if not name:
194 name = 'Unknown'
195 if not mail:
196 mail = 'unknown'
198 return '%s <%s>' % (name, mail)
200 def get_repo(url, alias):
201 global dirname, peer
203 myui = ui.ui()
204 myui.setconfig('ui', 'interactive', 'off')
206 if hg.islocal(url):
207 repo = hg.repository(myui, url)
208 else:
209 local_path = os.path.join(dirname, 'clone')
210 if not os.path.exists(local_path):
211 peer, dstpeer = hg.clone(myui, {}, url, local_path, update=False, pull=True)
212 repo = dstpeer.local()
213 else:
214 repo = hg.repository(myui, local_path)
215 peer = hg.peer(myui, {}, url)
216 repo.pull(peer, heads=None, force=True)
218 return repo
220 def rev_to_mark(rev):
221 global marks
222 return marks.from_rev(rev)
224 def mark_to_rev(mark):
225 global marks
226 return marks.to_rev(mark)
228 def export_ref(repo, name, kind, head):
229 global prefix, marks
231 ename = '%s/%s' % (kind, name)
232 tip = marks.get_tip(ename)
234 # mercurial takes too much time checking this
235 if tip and tip == head.rev():
236 # nothing to do
237 return
238 revs = repo.revs('%u:%u' % (tip, head))
239 count = 0
241 revs = [rev for rev in revs if not marks.is_marked(rev)]
243 for rev in revs:
245 c = repo[rev]
246 (manifest, user, (time, tz), files, desc, extra) = repo.changelog.read(c.node())
247 rev_branch = extra['branch']
249 author = "%s %d %s" % (fixup_user(user), time, gittz(tz))
250 if 'committer' in extra:
251 user, time, tz = extra['committer'].rsplit(' ', 2)
252 committer = "%s %s %s" % (user, time, gittz(int(tz)))
253 else:
254 committer = author
256 parents = [p for p in repo.changelog.parentrevs(rev) if p >= 0]
258 if len(parents) == 0:
259 modified = c.manifest().keys()
260 removed = []
261 else:
262 modified, removed = get_filechanges(repo, c, parents[0])
264 if len(parents) == 0 and rev:
265 print 'reset %s/%s' % (prefix, ename)
267 print "commit %s/%s" % (prefix, ename)
268 print "mark :%d" % (marks.get_mark(rev))
269 print "author %s" % (author)
270 print "committer %s" % (committer)
271 print "data %d" % (len(desc))
272 print desc
274 if len(parents) > 0:
275 print "from :%s" % (rev_to_mark(parents[0]))
276 if len(parents) > 1:
277 print "merge :%s" % (rev_to_mark(parents[1]))
279 for f in modified:
280 export_file(c.filectx(f))
281 for f in removed:
282 print "D %s" % (f)
283 print
285 count += 1
286 if (count % 100 == 0):
287 print "progress revision %d '%s' (%d/%d)" % (rev, name, count, len(revs))
288 print "#############################################################"
290 # make sure the ref is updated
291 print "reset %s/%s" % (prefix, ename)
292 print "from :%u" % rev_to_mark(rev)
293 print
295 marks.set_tip(ename, rev)
297 def export_tag(repo, tag):
298 export_ref(repo, tag, 'tags', repo[tag])
300 def export_bookmark(repo, bmark):
301 head = bmarks[bmark]
302 export_ref(repo, bmark, 'bookmarks', head)
304 def export_branch(repo, branch):
305 tip = get_branch_tip(repo, branch)
306 head = repo[tip]
307 export_ref(repo, branch, 'branches', head)
309 def export_head(repo):
310 global g_head
311 export_ref(repo, g_head[0], 'bookmarks', g_head[1])
313 def do_capabilities(parser):
314 global prefix, dirname
316 print "import"
317 print "export"
318 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
319 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
320 print "refspec refs/tags/*:%s/tags/*" % prefix
322 path = os.path.join(dirname, 'marks-git')
324 if os.path.exists(path):
325 print "*import-marks %s" % path
326 print "*export-marks %s" % path
328 print
330 def get_branch_tip(repo, branch):
331 global branches
333 heads = branches.get(branch, None)
334 if not heads:
335 return None
337 # verify there's only one head
338 if (len(heads) > 1):
339 warn("Branch '%s' has more than one head, consider merging" % branch)
340 # older versions of mercurial don't have this
341 if hasattr(repo, "branchtip"):
342 return repo.branchtip(branch)
344 return heads[0]
346 def list_head(repo, cur):
347 global g_head
349 head = bookmarks.readcurrent(repo)
350 if not head:
351 return
352 node = repo[head]
353 print "@refs/heads/%s HEAD" % head
354 g_head = (head, node)
356 def do_list(parser):
357 global branches, bmarks
359 repo = parser.repo
360 for branch in repo.branchmap():
361 heads = repo.branchheads(branch)
362 if len(heads):
363 branches[branch] = heads
365 for bmark, node in bookmarks.listbookmarks(repo).iteritems():
366 bmarks[bmark] = repo[node]
368 cur = repo.dirstate.branch()
370 list_head(repo, cur)
371 for branch in branches:
372 print "? refs/heads/branches/%s" % branch
373 for bmark in bmarks:
374 print "? refs/heads/%s" % bmark
376 for tag, node in repo.tagslist():
377 if tag == 'tip':
378 continue
379 print "? refs/tags/%s" % tag
381 print
383 def do_import(parser):
384 repo = parser.repo
386 path = os.path.join(dirname, 'marks-git')
388 print "feature done"
389 if os.path.exists(path):
390 print "feature import-marks=%s" % path
391 print "feature export-marks=%s" % path
392 sys.stdout.flush()
394 tmp = encoding.encoding
395 encoding.encoding = 'utf-8'
397 # lets get all the import lines
398 while parser.check('import'):
399 ref = parser[1]
401 if (ref == 'HEAD'):
402 export_head(repo)
403 elif ref.startswith('refs/heads/branches/'):
404 branch = ref[len('refs/heads/branches/'):]
405 export_branch(repo, branch)
406 elif ref.startswith('refs/heads/'):
407 bmark = ref[len('refs/heads/'):]
408 export_bookmark(repo, bmark)
409 elif ref.startswith('refs/tags/'):
410 tag = ref[len('refs/tags/'):]
411 export_tag(repo, tag)
413 parser.next()
415 encoding.encoding = tmp
417 print 'done'
419 def parse_blob(parser):
420 global blob_marks
422 parser.next()
423 mark = parser.get_mark()
424 parser.next()
425 data = parser.get_data()
426 blob_marks[mark] = data
427 parser.next()
428 return
430 def get_merge_files(repo, p1, p2, files):
431 for e in repo[p1].files():
432 if e not in files:
433 if e not in repo[p1].manifest():
434 continue
435 f = { 'ctx' : repo[p1][e] }
436 files[e] = f
438 def parse_commit(parser):
439 global marks, blob_marks, bmarks, parsed_refs
441 from_mark = merge_mark = None
443 ref = parser[1]
444 parser.next()
446 commit_mark = parser.get_mark()
447 parser.next()
448 author = parser.get_author()
449 parser.next()
450 committer = parser.get_author()
451 parser.next()
452 data = parser.get_data()
453 parser.next()
454 if parser.check('from'):
455 from_mark = parser.get_mark()
456 parser.next()
457 if parser.check('merge'):
458 merge_mark = parser.get_mark()
459 parser.next()
460 if parser.check('merge'):
461 die('octopus merges are not supported yet')
463 files = {}
465 for line in parser:
466 if parser.check('M'):
467 t, m, mark_ref, path = line.split(' ')
468 mark = int(mark_ref[1:])
469 f = { 'mode' : hgmode(m), 'data' : blob_marks[mark] }
470 elif parser.check('D'):
471 t, path = line.split(' ')
472 f = { 'deleted' : True }
473 else:
474 die('Unknown file command: %s' % line)
475 files[path] = f
477 def getfilectx(repo, memctx, f):
478 of = files[f]
479 if 'deleted' in of:
480 raise IOError
481 if 'ctx' in of:
482 return of['ctx']
483 is_exec = of['mode'] == 'x'
484 is_link = of['mode'] == 'l'
485 return context.memfilectx(f, of['data'], is_link, is_exec, None)
487 repo = parser.repo
489 user, date, tz = author
490 extra = {}
492 if committer != author:
493 extra['committer'] = "%s %u %u" % committer
495 if from_mark:
496 p1 = repo.changelog.node(mark_to_rev(from_mark))
497 else:
498 p1 = '\0' * 20
500 if merge_mark:
501 p2 = repo.changelog.node(mark_to_rev(merge_mark))
502 else:
503 p2 = '\0' * 20
506 # If files changed from any of the parents, hg wants to know, but in git if
507 # nothing changed from the first parent, nothing changed.
509 if merge_mark:
510 get_merge_files(repo, p1, p2, files)
512 ctx = context.memctx(repo, (p1, p2), data,
513 files.keys(), getfilectx,
514 user, (date, tz), extra)
516 tmp = encoding.encoding
517 encoding.encoding = 'utf-8'
519 node = repo.commitctx(ctx)
521 encoding.encoding = tmp
523 rev = repo[node].rev()
525 parsed_refs[ref] = node
527 marks.new_mark(rev, commit_mark)
529 def parse_reset(parser):
530 ref = parser[1]
531 parser.next()
532 # ugh
533 if parser.check('commit'):
534 parse_commit(parser)
535 return
536 if not parser.check('from'):
537 return
538 from_mark = parser.get_mark()
539 parser.next()
541 node = parser.repo.changelog.node(mark_to_rev(from_mark))
542 parsed_refs[ref] = node
544 def parse_tag(parser):
545 name = parser[1]
546 parser.next()
547 from_mark = parser.get_mark()
548 parser.next()
549 tagger = parser.get_author()
550 parser.next()
551 data = parser.get_data()
552 parser.next()
554 # nothing to do
556 def do_export(parser):
557 global parsed_refs, bmarks, peer
559 parser.next()
561 for line in parser.each_block('done'):
562 if parser.check('blob'):
563 parse_blob(parser)
564 elif parser.check('commit'):
565 parse_commit(parser)
566 elif parser.check('reset'):
567 parse_reset(parser)
568 elif parser.check('tag'):
569 parse_tag(parser)
570 elif parser.check('feature'):
571 pass
572 else:
573 die('unhandled export command: %s' % line)
575 for ref, node in parsed_refs.iteritems():
576 if ref.startswith('refs/heads/branches'):
577 pass
578 elif ref.startswith('refs/heads/'):
579 bmark = ref[len('refs/heads/'):]
580 if bmark in bmarks:
581 old = bmarks[bmark].hex()
582 else:
583 old = ''
584 if not bookmarks.pushbookmark(parser.repo, bmark, old, node):
585 continue
586 elif ref.startswith('refs/tags/'):
587 tag = ref[len('refs/tags/'):]
588 parser.repo.tag([tag], node, None, True, None, {})
589 print "ok %s" % ref
591 print
593 if peer:
594 parser.repo.push(peer, force=False)
596 def main(args):
597 global prefix, dirname, branches, bmarks
598 global marks, blob_marks, parsed_refs
599 global peer
601 alias = args[1]
602 url = args[2]
603 peer = None
605 if alias[4:] == url:
606 is_tmp = True
607 alias = util.sha1(alias).hexdigest()
608 else:
609 is_tmp = False
611 gitdir = os.environ['GIT_DIR']
612 dirname = os.path.join(gitdir, 'hg', alias)
613 branches = {}
614 bmarks = {}
615 blob_marks = {}
616 parsed_refs = {}
618 repo = get_repo(url, alias)
619 prefix = 'refs/hg/%s' % alias
621 if not os.path.exists(dirname):
622 os.makedirs(dirname)
624 marks_path = os.path.join(dirname, 'marks-hg')
625 marks = Marks(marks_path)
627 parser = Parser(repo)
628 for line in parser:
629 if parser.check('capabilities'):
630 do_capabilities(parser)
631 elif parser.check('list'):
632 do_list(parser)
633 elif parser.check('import'):
634 do_import(parser)
635 elif parser.check('export'):
636 do_export(parser)
637 else:
638 die('unhandled command: %s' % line)
639 sys.stdout.flush()
641 if not is_tmp:
642 marks.store()
643 else:
644 shutil.rmtree(dirname)
646 sys.exit(main(sys.argv))