3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
10 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
,node
11 from tempfile
import mkstemp
16 # silly regex to see if user field has email address
17 user_re
=re
.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
20 # insert 'checkpoint' command after this many commits or none at all if 0
21 cfg_checkpoint_count
=0
24 sys
.stderr
.write(__doc__
)
29 return myui
,hg
.repository(myui
,url
)
31 def get_changeset(ui
,repo
,revision
,authors
):
36 def fixup_user(user
,authors
):
38 # if we have an authors table, try to get mapping
39 # by defaultung to the current value of 'user'
40 user
=authors
.get(user
,user
)
41 if user_re
.match(user
)==None:
43 return user
+' <none@none>'
44 return user
+' <'+user
+'>'
46 node
=repo
.lookup(revision
)
47 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
48 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
49 branch
=get_branch(extra
.get('branch','master'))
50 return (manifest
,fixup_user(user
,authors
),(time
,tz
),files
,desc
,branch
,extra
)
53 return x
and '100755' or '100644'
57 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
59 def checkpoint(count
):
61 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
62 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
67 def get_parent_mark(parent
,marks
):
68 """Get the mark for some parent.
69 If we saw it in the current session, return :%d syntax and
70 otherwise the SHA1 from the cache."""
71 return marks
.get(str(parent
+1),':%d' % (parent
+1))
74 """See if two revisions of a file are not equal."""
75 return node
.hex(f1
)!=node
.hex(f2
)
77 def outer_set(dleft
,dright
,l
,c
,r
):
78 """Loop over our repository and find all changed and missing files."""
79 for left
in dleft
.keys():
80 right
=dright
.get(left
,None)
82 # we have the file but our parent hasn't: add to left set
84 elif mismatch(dleft
[left
],right
):
85 # we have it but checksums mismatch: add to center set
87 for right
in dright
.keys():
88 left
=dleft
.get(right
,None)
90 # if parent has file but we don't: add to right set
92 # change is already handled when comparing child against parent
95 def get_filechanges(repo
,revision
,parents
,mleft
):
96 """Given some repository and revision, find all changed/deleted files."""
100 mright
=repo
.changectx(p
).manifest()
105 l
,c
,r
=outer_set(mleft
,mright
,l
,c
,r
)
108 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
,authors
):
109 (_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
110 parents
=repo
.changelog
.parentrevs(revision
)
112 wr('commit refs/heads/%s' % branch
)
113 wr('mark :%d' % (revision
+1))
114 wr('committer %s %d %s' % (user
,time
,timezone
))
115 wr('data %d' % (len(desc
)+1)) # wtf?
119 src
=heads
.get(branch
,'')
122 # if we have a cached head, this is an incremental import: initialize it
123 # and kill reference so we won't init it again
126 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
128 link
=src
# avoid making a merge commit for incremental import
129 elif link
=='' and not heads
.has_key(branch
) and revision
>0:
130 # newly created branch and not the first one: connect to parent
131 tmp
=get_parent_mark(parents
[0],marks
)
133 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
135 link
=tmp
# avoid making a merge commit for branch fork
138 l
=last
.get(branch
,revision
)
140 # 1) as this commit implicitely is the child of the most recent
141 # commit of this branch, ignore this parent
142 # 2) ignore nonexistent parents
144 if p
==l
or p
==revision
or p
<0:
146 tmp
=get_parent_mark(p
,marks
)
147 # if we fork off a branch, don't merge with our parent via 'merge'
148 # as we have 'from' already above
151 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
155 last
[branch
]=revision
157 # we need this later to write out tags
158 marks
[str(revision
)]=':%d'%(revision
+1)
160 ctx
=repo
.changectx(str(revision
))
162 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
164 sys
.stderr
.write('Exporting revision %d with %d/%d/%d added/changed/removed files\n' %
165 (revision
,len(added
),len(changed
),len(removed
)))
167 for a
in added
+changed
:
170 wr('M %s inline %s' % (gitmode(man
.execf(a
)),a
))
171 wr('data %d' % len(d
)) # had some trouble with size()
178 return checkpoint(count
)
180 def export_tags(ui
,repo
,marks_cache
,start
,end
,count
,authors
):
183 # ignore latest revision
184 if tag
=='tip': continue
185 rev
=repo
.changelog
.rev(node
)
186 # ignore those tags not in our import range
187 if rev
<start
or rev
>=end
: continue
189 ref
=marks_cache
.get(str(rev
),None)
191 sys
.stderr
.write('Failed to find reference for creating tag'
192 ' %s at r%d\n' % (tag
,rev
))
194 (_
,user
,(time
,timezone
),_
,desc
,branch
,_
)=get_changeset(ui
,repo
,rev
,authors
)
195 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
198 wr('tagger %s %d %s' % (user
,time
,timezone
))
199 msg
='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag
,
200 rev
,branch
,desc
.split('\n')[0])
201 wr('data %d' % (len(msg
)+1))
204 count
=checkpoint(count
)
207 def load_cache(filename
):
209 if not os
.path
.exists(filename
):
213 for line
in f
.readlines():
215 fields
=line
.split(' ')
216 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
217 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
219 # put key:value in cache, key without ^:
220 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
224 def save_cache(filename
,cache
):
225 f
=open(filename
,'w+')
226 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
229 def verify_heads(ui
,repo
,cache
):
231 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
232 sha1
=f
.readlines()[0].split('\n')[0]
236 for b
in cache
.keys():
237 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
241 sys
.stderr
.write('Warning: Branch [%s] modified outside hg2git:'
242 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
245 def hg2git(repourl
,m
,marksfile
,headsfile
,tipfile
,authors
={}):
248 marks_cache
=load_cache(marksfile
)
249 heads_cache
=load_cache(headsfile
)
250 state_cache
=load_cache(tipfile
)
252 ui
,repo
=setup_repo(repourl
)
254 if not verify_heads(ui
,repo
,heads_cache
):
257 tip
=repo
.changelog
.count()
259 min=int(state_cache
.get('tip',0))
266 for rev
in range(min,max):
267 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,tip
,c
,authors
)
269 c
=export_tags(ui
,repo
,marks_cache
,min,max,c
,authors
)
271 sys
.stderr
.write('Issued %d commands\n' % c
)
273 state_cache
['tip']=max
274 state_cache
['repo']=repourl
275 save_cache(tipfile
,state_cache
)
279 if __name__
=='__main__':
280 if len(sys
.argv
)!=6: sys
.exit(usage(1))
281 repourl
,m
,marksfile
,headsfile
,tipfile
=sys
.argv
[1:]
282 sys
.exit(hg2git(repourl
,m
,marksfile
,headsfile
,tipfile
))