3 # Copyright (c) 2012 Felipe Contreras
6 # Inspired by Rocco Rutte's hg-fast-export
8 # Just copy to your ~/bin, or anywhere in your $PATH.
9 # Then you can clone with:
10 # git clone hg::/path/to/mercurial/repo/
12 from mercurial
import hg
, ui
, bookmarks
, context
, util
, encoding
20 NAME_RE
= re
.compile('^([^<>]+)')
21 AUTHOR_RE
= re
.compile('^([^<>]+?)? ?<([^<>]+)>$')
22 RAW_AUTHOR_RE
= re
.compile('^(\w+) (?:(.+)? )?<(.+)> (\d+) ([+-]\d+)')
25 sys
.stderr
.write('ERROR: %s\n' % (msg
% args
))
29 sys
.stderr
.write('WARNING: %s\n' % (msg
% args
))
32 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
35 return '%+03d%02d' % (-tz
/ 3600, -tz
% 3600 / 60)
38 m
= { '0100755': 'x', '0120000': 'l' }
39 return m
.get(mode
, '')
43 def __init__(self
, path
):
53 if not os
.path
.exists(self
.path
):
56 tmp
= json
.load(open(self
.path
))
58 self
.tips
= tmp
['tips']
59 self
.marks
= tmp
['marks']
60 self
.last_mark
= tmp
['last-mark']
62 for rev
, mark
in self
.marks
.iteritems():
63 self
.rev_marks
[mark
] = int(rev
)
66 return { 'tips': self
.tips
, 'marks': self
.marks
, 'last-mark' : self
.last_mark
}
69 json
.dump(self
.dict(), open(self
.path
, 'w'))
72 return str(self
.dict())
74 def from_rev(self
, rev
):
75 return self
.marks
[str(rev
)]
77 def to_rev(self
, mark
):
78 return self
.rev_marks
[mark
]
80 def get_mark(self
, rev
):
82 self
.marks
[str(rev
)] = self
.last_mark
85 def new_mark(self
, rev
, mark
):
86 self
.marks
[str(rev
)] = mark
87 self
.rev_marks
[mark
] = rev
90 def is_marked(self
, rev
):
91 return self
.marks
.has_key(str(rev
))
93 def get_tip(self
, branch
):
94 return self
.tips
.get(branch
, 0)
96 def set_tip(self
, branch
, tip
):
97 self
.tips
[branch
] = tip
101 def __init__(self
, repo
):
103 self
.line
= self
.get_line()
106 return sys
.stdin
.readline().strip()
108 def __getitem__(self
, i
):
109 return self
.line
.split()[i
]
111 def check(self
, word
):
112 return self
.line
.startswith(word
)
114 def each_block(self
, separator
):
115 while self
.line
!= separator
:
117 self
.line
= self
.get_line()
120 return self
.each_block('')
123 self
.line
= self
.get_line()
124 if self
.line
== 'done':
128 i
= self
.line
.index(':') + 1
129 return int(self
.line
[i
:])
132 if not self
.check('data'):
134 i
= self
.line
.index(' ') + 1
135 size
= int(self
.line
[i
:])
136 return sys
.stdin
.read(size
)
138 def get_author(self
):
139 m
= RAW_AUTHOR_RE
.match(self
.line
)
142 _
, name
, email
, date
, tz
= m
.groups()
144 if email
!= 'unknown':
146 user
= '%s <%s>' % (name
, email
)
148 user
= '<%s>' % (email
)
153 tz
= ((tz
/ 100) * 3600) + ((tz
% 100) * 60)
154 return (user
, int(date
), -tz
)
158 print "M %s inline %s" % (gitmode(fc
.flags()), fc
.path())
159 print "data %d" % len(d
)
162 def get_filechanges(repo
, ctx
, parent
):
168 prev
= repo
[parent
].manifest().copy()
172 if (cur
.flags(fn
) != prev
.flags(fn
) or cur
[fn
] != prev
[fn
]):
177 removed |
= set(prev
.keys())
179 return added | modified
, removed
181 def fixup_user(user
):
182 user
= user
.replace('"', '')
184 m
= AUTHOR_RE
.match(user
)
187 mail
= m
.group(2).strip()
189 m
= NAME_RE
.match(user
)
191 name
= m
.group(1).strip()
198 return '%s <%s>' % (name
, mail
)
200 def get_repo(url
, alias
):
204 myui
.setconfig('ui', 'interactive', 'off')
207 repo
= hg
.repository(myui
, url
)
209 local_path
= os
.path
.join(dirname
, 'clone')
210 if not os
.path
.exists(local_path
):
211 peer
, dstpeer
= hg
.clone(myui
, {}, url
, local_path
, update
=False, pull
=True)
212 repo
= dstpeer
.local()
214 repo
= hg
.repository(myui
, local_path
)
215 peer
= hg
.peer(myui
, {}, url
)
216 repo
.pull(peer
, heads
=None, force
=True)
220 def rev_to_mark(rev
):
222 return marks
.from_rev(rev
)
224 def mark_to_rev(mark
):
226 return marks
.to_rev(mark
)
228 def export_ref(repo
, name
, kind
, head
):
231 ename
= '%s/%s' % (kind
, name
)
232 tip
= marks
.get_tip(ename
)
234 # mercurial takes too much time checking this
235 if tip
and tip
== head
.rev():
238 revs
= repo
.revs('%u:%u' % (tip
, head
))
241 revs
= [rev
for rev
in revs
if not marks
.is_marked(rev
)]
246 (manifest
, user
, (time
, tz
), files
, desc
, extra
) = repo
.changelog
.read(c
.node())
247 rev_branch
= extra
['branch']
249 author
= "%s %d %s" % (fixup_user(user
), time
, gittz(tz
))
250 if 'committer' in extra
:
251 user
, time
, tz
= extra
['committer'].rsplit(' ', 2)
252 committer
= "%s %s %s" % (user
, time
, gittz(int(tz
)))
256 parents
= [p
for p
in repo
.changelog
.parentrevs(rev
) if p
>= 0]
258 if len(parents
) == 0:
259 modified
= c
.manifest().keys()
262 modified
, removed
= get_filechanges(repo
, c
, parents
[0])
264 if len(parents
) == 0 and rev
:
265 print 'reset %s/%s' % (prefix
, ename
)
267 print "commit %s/%s" % (prefix
, ename
)
268 print "mark :%d" % (marks
.get_mark(rev
))
269 print "author %s" % (author
)
270 print "committer %s" % (committer
)
271 print "data %d" % (len(desc
))
275 print "from :%s" % (rev_to_mark(parents
[0]))
277 print "merge :%s" % (rev_to_mark(parents
[1]))
280 export_file(c
.filectx(f
))
286 if (count
% 100 == 0):
287 print "progress revision %d '%s' (%d/%d)" % (rev
, name
, count
, len(revs
))
288 print "#############################################################"
290 # make sure the ref is updated
291 print "reset %s/%s" % (prefix
, ename
)
292 print "from :%u" % rev_to_mark(rev
)
295 marks
.set_tip(ename
, rev
)
297 def export_tag(repo
, tag
):
298 export_ref(repo
, tag
, 'tags', repo
[tag
])
300 def export_bookmark(repo
, bmark
):
302 export_ref(repo
, bmark
, 'bookmarks', head
)
304 def export_branch(repo
, branch
):
305 tip
= get_branch_tip(repo
, branch
)
307 export_ref(repo
, branch
, 'branches', head
)
309 def export_head(repo
):
311 export_ref(repo
, g_head
[0], 'bookmarks', g_head
[1])
313 def do_capabilities(parser
):
314 global prefix
, dirname
318 print "refspec refs/heads/branches/*:%s/branches/*" % prefix
319 print "refspec refs/heads/*:%s/bookmarks/*" % prefix
320 print "refspec refs/tags/*:%s/tags/*" % prefix
322 path
= os
.path
.join(dirname
, 'marks-git')
324 if os
.path
.exists(path
):
325 print "*import-marks %s" % path
326 print "*export-marks %s" % path
330 def get_branch_tip(repo
, branch
):
333 heads
= branches
.get(branch
, None)
337 # verify there's only one head
339 warn("Branch '%s' has more than one head, consider merging" % branch
)
340 # older versions of mercurial don't have this
341 if hasattr(repo
, "branchtip"):
342 return repo
.branchtip(branch
)
346 def list_head(repo
, cur
):
349 head
= bookmarks
.readcurrent(repo
)
353 print "@refs/heads/%s HEAD" % head
354 g_head
= (head
, node
)
357 global branches
, bmarks
360 for branch
in repo
.branchmap():
361 heads
= repo
.branchheads(branch
)
363 branches
[branch
] = heads
365 for bmark
, node
in bookmarks
.listbookmarks(repo
).iteritems():
366 bmarks
[bmark
] = repo
[node
]
368 cur
= repo
.dirstate
.branch()
371 for branch
in branches
:
372 print "? refs/heads/branches/%s" % branch
374 print "? refs/heads/%s" % bmark
376 for tag
, node
in repo
.tagslist():
379 print "? refs/tags/%s" % tag
383 def do_import(parser
):
386 path
= os
.path
.join(dirname
, 'marks-git')
389 if os
.path
.exists(path
):
390 print "feature import-marks=%s" % path
391 print "feature export-marks=%s" % path
394 tmp
= encoding
.encoding
395 encoding
.encoding
= 'utf-8'
397 # lets get all the import lines
398 while parser
.check('import'):
403 elif ref
.startswith('refs/heads/branches/'):
404 branch
= ref
[len('refs/heads/branches/'):]
405 export_branch(repo
, branch
)
406 elif ref
.startswith('refs/heads/'):
407 bmark
= ref
[len('refs/heads/'):]
408 export_bookmark(repo
, bmark
)
409 elif ref
.startswith('refs/tags/'):
410 tag
= ref
[len('refs/tags/'):]
411 export_tag(repo
, tag
)
415 encoding
.encoding
= tmp
419 def parse_blob(parser
):
423 mark
= parser
.get_mark()
425 data
= parser
.get_data()
426 blob_marks
[mark
] = data
430 def get_merge_files(repo
, p1
, p2
, files
):
431 for e
in repo
[p1
].files():
433 if e
not in repo
[p1
].manifest():
435 f
= { 'ctx' : repo
[p1
][e
] }
438 def parse_commit(parser
):
439 global marks
, blob_marks
, bmarks
, parsed_refs
441 from_mark
= merge_mark
= None
446 commit_mark
= parser
.get_mark()
448 author
= parser
.get_author()
450 committer
= parser
.get_author()
452 data
= parser
.get_data()
454 if parser
.check('from'):
455 from_mark
= parser
.get_mark()
457 if parser
.check('merge'):
458 merge_mark
= parser
.get_mark()
460 if parser
.check('merge'):
461 die('octopus merges are not supported yet')
466 if parser
.check('M'):
467 t
, m
, mark_ref
, path
= line
.split(' ')
468 mark
= int(mark_ref
[1:])
469 f
= { 'mode' : hgmode(m
), 'data' : blob_marks
[mark
] }
470 elif parser
.check('D'):
471 t
, path
= line
.split(' ')
472 f
= { 'deleted' : True }
474 die('Unknown file command: %s' % line
)
477 def getfilectx(repo
, memctx
, f
):
483 is_exec
= of
['mode'] == 'x'
484 is_link
= of
['mode'] == 'l'
485 return context
.memfilectx(f
, of
['data'], is_link
, is_exec
, None)
489 user
, date
, tz
= author
492 if committer
!= author
:
493 extra
['committer'] = "%s %u %u" % committer
496 p1
= repo
.changelog
.node(mark_to_rev(from_mark
))
501 p2
= repo
.changelog
.node(mark_to_rev(merge_mark
))
506 # If files changed from any of the parents, hg wants to know, but in git if
507 # nothing changed from the first parent, nothing changed.
510 get_merge_files(repo
, p1
, p2
, files
)
512 ctx
= context
.memctx(repo
, (p1
, p2
), data
,
513 files
.keys(), getfilectx
,
514 user
, (date
, tz
), extra
)
516 tmp
= encoding
.encoding
517 encoding
.encoding
= 'utf-8'
519 node
= repo
.commitctx(ctx
)
521 encoding
.encoding
= tmp
523 rev
= repo
[node
].rev()
525 parsed_refs
[ref
] = node
527 marks
.new_mark(rev
, commit_mark
)
529 def parse_reset(parser
):
533 if parser
.check('commit'):
536 if not parser
.check('from'):
538 from_mark
= parser
.get_mark()
541 node
= parser
.repo
.changelog
.node(mark_to_rev(from_mark
))
542 parsed_refs
[ref
] = node
544 def parse_tag(parser
):
547 from_mark
= parser
.get_mark()
549 tagger
= parser
.get_author()
551 data
= parser
.get_data()
556 def do_export(parser
):
557 global parsed_refs
, bmarks
, peer
561 for line
in parser
.each_block('done'):
562 if parser
.check('blob'):
564 elif parser
.check('commit'):
566 elif parser
.check('reset'):
568 elif parser
.check('tag'):
570 elif parser
.check('feature'):
573 die('unhandled export command: %s' % line
)
575 for ref
, node
in parsed_refs
.iteritems():
576 if ref
.startswith('refs/heads/branches'):
578 elif ref
.startswith('refs/heads/'):
579 bmark
= ref
[len('refs/heads/'):]
581 old
= bmarks
[bmark
].hex()
584 if not bookmarks
.pushbookmark(parser
.repo
, bmark
, old
, node
):
586 elif ref
.startswith('refs/tags/'):
587 tag
= ref
[len('refs/tags/'):]
588 parser
.repo
.tag([tag
], node
, None, True, None, {})
594 parser
.repo
.push(peer
, force
=False)
597 global prefix
, dirname
, branches
, bmarks
598 global marks
, blob_marks
, parsed_refs
607 alias
= util
.sha1(alias
).hexdigest()
611 gitdir
= os
.environ
['GIT_DIR']
612 dirname
= os
.path
.join(gitdir
, 'hg', alias
)
618 repo
= get_repo(url
, alias
)
619 prefix
= 'refs/hg/%s' % alias
621 if not os
.path
.exists(dirname
):
624 marks_path
= os
.path
.join(dirname
, 'marks-hg')
625 marks
= Marks(marks_path
)
627 parser
= Parser(repo
)
629 if parser
.check('capabilities'):
630 do_capabilities(parser
)
631 elif parser
.check('list'):
633 elif parser
.check('import'):
635 elif parser
.check('export'):
638 die('unhandled command: %s' % line
)
644 shutil
.rmtree(dirname
)
646 sys
.exit(main(sys
.argv
))