3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
10 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
11 from tempfile
import mkstemp
16 # silly regex to see if user field has email address
17 user_re
=re
.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
20 # insert 'checkpoint' command after this many commits
21 cfg_checkpoint_count
=1000
24 sys
.stderr
.write(__doc__
)
29 return myui
,hg
.repository(myui
,url
)
31 def get_changeset(ui
,repo
,revision
):
37 if user_re
.match(user
)==None:
39 return user
+' <none@none>'
40 return user
+' <'+user
+'>'
42 node
=repo
.lookup(revision
)
43 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
44 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
45 branch
=get_branch(extra
.get('branch','master'))
46 return (manifest
,fixup_user(user
),(time
,tz
),files
,desc
,branch
,extra
)
49 return x
and '100755' or '100644'
53 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
55 def checkpoint(count
):
57 if count
%cfg_checkpoint
_count
==0:
58 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
63 def get_parent_mark(parent
,marks
):
64 p
=marks
.get(str(parent
),None)
66 # if we didn't see parent previously, assume we saw it in this run
70 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
):
71 sys
.stderr
.write('Exporting revision %d (tip %d) as [:%d]\n' % (revision
,max,revision
+1))
73 (_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
)
74 parents
=repo
.changelog
.parentrevs(revision
)
76 # we need this later to write out tags
77 marks
[str(revision
)]=':%d'%(revision
+1)
79 wr('commit refs/heads/%s' % branch
)
80 wr('mark :%d' % (revision
+1))
81 wr('committer %s %d %s' % (user
,time
,timezone
))
82 wr('data %d' % (len(desc
)+1)) # wtf?
86 src
=heads
.get(branch
,'')
89 # if we have a cached head, this is an incremental import: initialize it
90 # and kill reference so we won't init it again
93 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
95 link
=src
# avoid making a merge commit for incremental import
96 elif not heads
.has_key(branch
) and revision
>0:
97 # newly created branch and not the first one: connect to parent
98 tmp
=get_parent_mark(parents
[0],marks
)
100 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
102 link
=tmp
# avoid making a merge commit for branch fork
105 l
=last
.get(branch
,revision
)
107 # 1) as this commit implicitely is the child of the most recent
108 # commit of this branch, ignore this parent
109 # 2) ignore nonexistent parents
111 if p
==l
or p
==revision
or p
<0:
113 tmp
=get_parent_mark(p
,marks
)
114 # if we fork off a branch, don't merge via 'merge' as we have
115 # 'from' already above
118 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
122 last
[branch
]=revision
125 # just wipe the branch clean, all full manifest contents
128 ctx
=repo
.changectx(str(revision
))
131 #for f in man.keys():
132 # fctx=ctx.filectx(f)
134 # wr('M %s inline %s' % (gitmode(man.execf(f)),f))
135 # wr('data %d' % len(d)) # had some trouble with size()
138 for fctx
in ctx
.filectxs():
141 wr('M %s inline %s' % (gitmode(man
.execf(f
)),f
))
142 wr('data %d' % len(d
)) # had some trouble with size()
146 return checkpoint(count
)
148 def export_tags(ui
,repo
,cache
,count
):
153 rev
=repo
.changelog
.rev(node
)
154 ref
=cache
.get(str(rev
),None)
156 sys
.stderr
.write('Failed to find reference for creating tag'
157 ' %s at r%d\n' % (tag
,rev
))
159 (_
,user
,(time
,timezone
),_
,desc
,branch
,_
)=get_changeset(ui
,repo
,rev
)
160 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
163 wr('tagger %s %d %s' % (user
,time
,timezone
))
164 msg
='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag
,
165 rev
,branch
,desc
.split('\n')[0])
166 wr('data %d' % (len(msg
)+1))
169 count
=checkpoint(count
)
172 def load_cache(filename
):
174 if not os
.path
.exists(filename
):
178 for line
in f
.readlines():
180 fields
=line
.split(' ')
181 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
182 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
184 # put key:value in cache, key without ^:
185 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
189 def save_cache(filename
,cache
):
190 f
=open(filename
,'w+')
191 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
194 def verify_heads(ui
,repo
,cache
):
196 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
197 sha1
=f
.readlines()[0].split('\n')[0]
201 for b
in cache
.keys():
202 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
206 sys
.stderr
.write('Warning: Branch [%s] modified outside hg2git:'
207 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
210 if __name__
=='__main__':
211 if len(sys
.argv
)!=6: sys
.exit(usage(1))
212 repourl
,m
,marksfile
,headsfile
,tipfile
=sys
.argv
[1:]
215 marks_cache
=load_cache(marksfile
)
216 heads_cache
=load_cache(headsfile
)
217 state_cache
=load_cache(tipfile
)
219 ui
,repo
=setup_repo(repourl
)
221 if not verify_heads(ui
,repo
,heads_cache
):
224 tip
=repo
.changelog
.count()
226 min=int(state_cache
.get('tip',0))
231 c
=int(state_cache
.get('count',0))
233 for rev
in range(min,max):
234 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,tip
,c
)
236 c
=export_tags(ui
,repo
,marks_cache
,c
)
238 state_cache
['tip']=max
239 state_cache
['count']=c
240 state_cache
['repo']=repourl
241 save_cache(tipfile
,state_cache
)