3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
10 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
11 from tempfile
import mkstemp
16 # silly regex to see if user field has email address
17 user_re
=re
.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
20 # insert 'checkpoint' command after this many commits
21 cfg_checkpoint_count
=1000
24 sys
.stderr
.write(__doc__
)
29 return myui
,hg
.repository(myui
,url
)
31 def get_changeset(ui
,repo
,revision
):
37 if user_re
.match(user
)==None:
39 return user
+' <none@none>'
40 return user
+' <'+user
+'>'
42 node
=repo
.lookup(revision
)
43 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
44 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
45 branch
=get_branch(extra
.get('branch','master'))
46 return (manifest
,fixup_user(user
),(time
,tz
),files
,desc
,branch
,extra
)
49 return x
and '100755' or '100644'
53 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
55 def checkpoint(count
):
57 if count
%cfg_checkpoint
_count
==0:
58 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
63 def get_parent_mark(parent
,marks
):
64 p
=marks
.get(str(parent
),None)
66 # if we didn't see parent previously, assume we saw it in this run
70 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
):
71 sys
.stderr
.write('Exporting revision %d (tip %d) as [:%d]\n' % (revision
,max,revision
+1))
73 (_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
)
74 parents
=repo
.changelog
.parentrevs(revision
)
76 # we need this later to write out tags
77 marks
[str(revision
)]=':%d'%(revision
+1)
79 wr('commit refs/heads/%s' % branch
)
80 wr('mark :%d' % (revision
+1))
81 wr('committer %s %d %s' % (user
,time
,timezone
))
82 wr('data %d' % (len(desc
)+1)) # wtf?
86 src
=heads
.get(branch
,'')
89 # if we have a cached head, this is an incremental import: initialize it
90 # and kill reference so we won't init it again
93 elif not heads
.has_key(branch
) and revision
>0:
94 # newly created branch and not the first one: connect to parent
95 tmp
=get_parent_mark(parents
[0],marks
)
97 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
99 link
=tmp
# avoid making a merge commit for branch fork
102 l
=last
.get(branch
,revision
)
104 # 1) as this commit implicitely is the child of the most recent
105 # commit of this branch, ignore this parent
106 # 2) ignore nonexistent parents
108 if p
==l
or p
==revision
or p
<0:
110 tmp
=get_parent_mark(p
,marks
)
111 # if we fork off a branch, don't merge via 'merge' as we have
112 # 'from' already above
115 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
119 last
[branch
]=revision
122 # just wipe the branch clean, all full manifest contents
125 ctx
=repo
.changectx(str(revision
))
128 #for f in man.keys():
129 # fctx=ctx.filectx(f)
131 # wr('M %s inline %s' % (gitmode(man.execf(f)),f))
132 # wr('data %d' % len(d)) # had some trouble with size()
135 for fctx
in ctx
.filectxs():
138 wr('M %s inline %s' % (gitmode(man
.execf(f
)),f
))
139 wr('data %d' % len(d
)) # had some trouble with size()
143 return checkpoint(count
)
145 def export_tags(ui
,repo
,cache
,count
):
150 rev
=repo
.changelog
.rev(node
)
151 ref
=cache
.get(str(rev
),None)
153 sys
.stderr
.write('Failed to find reference for creating tag'
154 ' %s at r%d\n' % (tag
,rev
))
156 (_
,user
,(time
,timezone
),_
,desc
,branch
,_
)=get_changeset(ui
,repo
,rev
)
157 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
160 wr('tagger %s %d %s' % (user
,time
,timezone
))
161 msg
='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag
,
162 rev
,branch
,desc
.split('\n')[0])
163 wr('data %d' % (len(msg
)+1))
166 count
=checkpoint(count
)
169 def load_cache(filename
):
171 if not os
.path
.exists(filename
):
175 for line
in f
.readlines():
177 fields
=line
.split(' ')
178 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
179 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
181 # put key:value in cache, key without ^:
182 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
186 def save_cache(filename
,cache
):
187 f
=open(filename
,'w+')
188 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
191 def verify_heads(ui
,repo
,cache
):
193 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
194 sha1
=f
.readlines()[0].split('\n')[0]
198 for b
in cache
.keys():
199 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
203 sys
.stderr
.write('Warning: Branch [%s] modified outside hg2git:'
204 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
207 if __name__
=='__main__':
208 if len(sys
.argv
)!=6: sys
.exit(usage(1))
209 repourl
,m
,marksfile
,headsfile
,tipfile
=sys
.argv
[1:]
212 marks_cache
=load_cache(marksfile
)
213 heads_cache
=load_cache(headsfile
)
214 state_cache
=load_cache(tipfile
)
216 ui
,repo
=setup_repo(repourl
)
218 if not verify_heads(ui
,repo
,heads_cache
):
221 tip
=repo
.changelog
.count()
223 min=int(state_cache
.get('tip',0))
228 c
=int(state_cache
.get('count',0))
230 for rev
in range(min,max):
231 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,tip
,c
)
233 c
=export_tags(ui
,repo
,marks_cache
,c
)
235 state_cache
['tip']=max
236 state_cache
['count']=c
237 state_cache
['repo']=repourl
238 save_cache(tipfile
,state_cache
)