3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
10 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
,node
11 from tempfile
import mkstemp
16 # silly regex to see if user field has email address
17 user_re
=re
.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
20 # insert 'checkpoint' command after this many commits
21 cfg_checkpoint_count
=1000
24 sys
.stderr
.write(__doc__
)
29 return myui
,hg
.repository(myui
,url
)
31 def get_changeset(ui
,repo
,revision
):
37 if user_re
.match(user
)==None:
39 return user
+' <none@none>'
40 return user
+' <'+user
+'>'
42 node
=repo
.lookup(revision
)
43 (manifest
,user
,(time
,timezone
),files
,desc
,extra
)=repo
.changelog
.read(node
)
44 tz
="%+03d%02d" % (-timezone
/ 3600, ((-timezone
% 3600) / 60))
45 branch
=get_branch(extra
.get('branch','master'))
46 return (manifest
,fixup_user(user
),(time
,tz
),files
,desc
,branch
,extra
)
49 return x
and '100755' or '100644'
53 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
55 def checkpoint(count
):
57 if count
%cfg_checkpoint
_count
==0:
58 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
63 def get_parent_mark(parent
,marks
):
64 """Get the mark for some parent.
65 If we saw it in the current session, return :%d syntax and
66 otherwise the SHA1 from the cache."""
67 return marks
.get(str(parent
+1),':%d' % (parent
+1))
69 def mismatch(x
,f1
,f2
):
70 """See if two revisions of a file are not equal."""
71 return node
.hex(f1
)!=node
.hex(f2
)
73 def outer_set(dleft
,dright
,l
,r
):
74 """Loop over our repository in and find all changed and missing files."""
75 for left
in dleft
.keys():
76 right
=dright
.get(left
,None)
77 if right
==None or mismatch('A',dleft
[left
],right
):
78 # if either have the current file not in parent or the
79 # checksums differ: add it to changed files
81 for right
in dright
.keys():
82 left
=dleft
.get(right
,None)
84 # if we have a file in the parent but not our manifest,
85 # add it to deleted files; checksums are checked earlier
89 def get_filechanges(repo
,revision
,parents
,mleft
):
90 """Given some repository and revision, find all changed/deleted files."""
94 mright
=repo
.changectx(p
).manifest()
99 l
,r
=outer_set(mleft
,mright
,l
,r
)
102 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
):
103 (_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
)
104 parents
=repo
.changelog
.parentrevs(revision
)
106 wr('commit refs/heads/%s' % branch
)
107 wr('mark :%d' % (revision
+1))
108 wr('committer %s %d %s' % (user
,time
,timezone
))
109 wr('data %d' % (len(desc
)+1)) # wtf?
113 src
=heads
.get(branch
,'')
116 # if we have a cached head, this is an incremental import: initialize it
117 # and kill reference so we won't init it again
120 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
122 link
=src
# avoid making a merge commit for incremental import
123 elif link
=='' and not heads
.has_key(branch
) and revision
>0:
124 # newly created branch and not the first one: connect to parent
125 tmp
=get_parent_mark(parents
[0],marks
)
127 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
129 link
=tmp
# avoid making a merge commit for branch fork
132 l
=last
.get(branch
,revision
)
134 # 1) as this commit implicitely is the child of the most recent
135 # commit of this branch, ignore this parent
136 # 2) ignore nonexistent parents
138 if p
==l
or p
==revision
or p
<0:
140 tmp
=get_parent_mark(p
,marks
)
141 # if we fork off a branch, don't merge with our parent via 'merge'
142 # as we have 'from' already above
145 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
149 last
[branch
]=revision
151 # we need this later to write out tags
152 marks
[str(revision
)]=':%d'%(revision
+1)
154 ctx
=repo
.changectx(str(revision
))
156 added
,removed
=get_filechanges(repo
,revision
,parents
,man
)
158 sys
.stderr
.write('Exporting revision %d with %d changed/%d removed files\n' %
159 (revision
,len(added
),len(removed
)))
164 wr('M %s inline %s' % (gitmode(man
.execf(a
)),a
))
165 wr('data %d' % len(d
)) # had some trouble with size()
172 return checkpoint(count
)
174 def export_tags(ui
,repo
,cache
,count
):
179 rev
=repo
.changelog
.rev(node
)
180 ref
=cache
.get(str(rev
),None)
182 #sys.stderr.write('Failed to find reference for creating tag'
183 # ' %s at r%d\n' % (tag,rev))
185 (_
,user
,(time
,timezone
),_
,desc
,branch
,_
)=get_changeset(ui
,repo
,rev
)
186 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
189 wr('tagger %s %d %s' % (user
,time
,timezone
))
190 msg
='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag
,
191 rev
,branch
,desc
.split('\n')[0])
192 wr('data %d' % (len(msg
)+1))
195 count
=checkpoint(count
)
198 def load_cache(filename
):
200 if not os
.path
.exists(filename
):
204 for line
in f
.readlines():
206 fields
=line
.split(' ')
207 if fields
==None or not len(fields
)==2 or fields
[0][0]!=':':
208 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
210 # put key:value in cache, key without ^:
211 cache
[fields
[0][1:]]=fields
[1].split('\n')[0]
215 def save_cache(filename
,cache
):
216 f
=open(filename
,'w+')
217 map(lambda x
: f
.write(':%s %s\n' % (str(x
),str(cache
.get(x
)))),cache
.keys())
220 def verify_heads(ui
,repo
,cache
):
222 f
=open(os
.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch
)
223 sha1
=f
.readlines()[0].split('\n')[0]
227 for b
in cache
.keys():
228 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
232 sys
.stderr
.write('Warning: Branch [%s] modified outside hg2git:'
233 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
236 if __name__
=='__main__':
237 if len(sys
.argv
)!=6: sys
.exit(usage(1))
238 repourl
,m
,marksfile
,headsfile
,tipfile
=sys
.argv
[1:]
241 marks_cache
=load_cache(marksfile
)
242 heads_cache
=load_cache(headsfile
)
243 state_cache
=load_cache(tipfile
)
245 ui
,repo
=setup_repo(repourl
)
247 if not verify_heads(ui
,repo
,heads_cache
):
250 tip
=repo
.changelog
.count()
252 min=int(state_cache
.get('tip',0))
257 c
=int(state_cache
.get('count',0))
259 for rev
in range(min,max):
260 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,tip
,c
)
262 c
=export_tags(ui
,repo
,marks_cache
,c
)
264 state_cache
['tip']=max
265 state_cache
['count']=c
266 state_cache
['repo']=repourl
267 save_cache(tipfile
,state_cache
)