hg2git.py: Add support for user-defined hg author -> git author mapping
[fast-export/rorcz.git] / hg2git.py
blob046ad1f1b6452d01240a434625bcfc05903cdee5
1 #!/usr/bin/env python
3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
4 # License: GPLv2
6 """hg2git.py - A mercurial-to-git filter for git-fast-import(1)
7 Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
8 """
10 from mercurial import repo,hg,cmdutil,util,ui,revlog,node
11 from tempfile import mkstemp
12 import re
13 import sys
14 import os
16 # silly regex to see if user field has email address
17 user_re=re.compile('[^<]+ <[^>]+>$')
18 # git branch for hg's default 'HEAD' branch
19 cfg_master='master'
20 # insert 'checkpoint' command after this many commits or none at all if 0
21 cfg_checkpoint_count=0
23 def usage(ret):
24 sys.stderr.write(__doc__)
25 return ret
27 def setup_repo(url):
28 myui=ui.ui()
29 return myui,hg.repository(myui,url)
31 def get_changeset(ui,repo,revision,authors):
32 def get_branch(name):
33 if name=='HEAD':
34 name=cfg_master
35 return name
36 def fixup_user(user,authors):
37 if authors!=None:
38 # if we have an authors table, try to get mapping
39 # by defaultung to the current value of 'user'
40 user=authors.get(user,user)
41 if user_re.match(user)==None:
42 if '@' not in user:
43 return user+' <none@none>'
44 return user+' <'+user+'>'
45 return user
46 node=repo.lookup(revision)
47 (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
48 tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
49 branch=get_branch(extra.get('branch','master'))
50 return (manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
52 def gitmode(x):
53 return x and '100755' or '100644'
55 def wr(msg=''):
56 print msg
57 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
59 def checkpoint(count):
60 count=count+1
61 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
62 sys.stderr.write("Checkpoint after %d commits\n" % count)
63 wr('checkpoint')
64 wr()
65 return count
67 def get_parent_mark(parent,marks):
68 """Get the mark for some parent.
69 If we saw it in the current session, return :%d syntax and
70 otherwise the SHA1 from the cache."""
71 return marks.get(str(parent+1),':%d' % (parent+1))
73 def mismatch(f1,f2):
74 """See if two revisions of a file are not equal."""
75 return node.hex(f1)!=node.hex(f2)
77 def outer_set(dleft,dright,l,c,r):
78 """Loop over our repository and find all changed and missing files."""
79 for left in dleft.keys():
80 right=dright.get(left,None)
81 if right==None:
82 # we have the file but our parent hasn't: add to left set
83 l.append(left)
84 elif mismatch(dleft[left],right):
85 # we have it but checksums mismatch: add to center set
86 c.append(left)
87 for right in dright.keys():
88 left=dleft.get(right,None)
89 if left==None:
90 # if parent has file but we don't: add to right set
91 r.append(right)
92 # change is already handled when comparing child against parent
93 return l,c,r
95 def get_filechanges(repo,revision,parents,mleft):
96 """Given some repository and revision, find all changed/deleted files."""
97 l,c,r=[],[],[]
98 for p in parents:
99 if p<0: continue
100 mright=repo.changectx(p).manifest()
101 dleft=mleft.keys()
102 dleft.sort()
103 dright=mright.keys()
104 dright.sort()
105 l,c,r=outer_set(mleft,mright,l,c,r)
106 return l,c,r
108 def export_commit(ui,repo,revision,marks,heads,last,max,count,authors):
109 (_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
110 parents=repo.changelog.parentrevs(revision)
112 wr('commit refs/heads/%s' % branch)
113 wr('mark :%d' % (revision+1))
114 wr('committer %s %d %s' % (user,time,timezone))
115 wr('data %d' % (len(desc)+1)) # wtf?
116 wr(desc)
117 wr()
119 src=heads.get(branch,'')
120 link=''
121 if src!='':
122 # if we have a cached head, this is an incremental import: initialize it
123 # and kill reference so we won't init it again
124 wr('from %s' % src)
125 heads[branch]=''
126 sys.stderr.write('Initializing branch [%s] to parent [%s]\n' %
127 (branch,src))
128 link=src # avoid making a merge commit for incremental import
129 elif link=='' and not heads.has_key(branch) and revision>0:
130 # newly created branch and not the first one: connect to parent
131 tmp=get_parent_mark(parents[0],marks)
132 wr('from %s' % tmp)
133 sys.stderr.write('Link new branch [%s] to parent [%s]\n' %
134 (branch,tmp))
135 link=tmp # avoid making a merge commit for branch fork
137 if parents:
138 l=last.get(branch,revision)
139 for p in parents:
140 # 1) as this commit implicitely is the child of the most recent
141 # commit of this branch, ignore this parent
142 # 2) ignore nonexistent parents
143 # 3) merge otherwise
144 if p==l or p==revision or p<0:
145 continue
146 tmp=get_parent_mark(p,marks)
147 # if we fork off a branch, don't merge with our parent via 'merge'
148 # as we have 'from' already above
149 if tmp==link:
150 continue
151 sys.stderr.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
152 (branch,tmp,p))
153 wr('merge %s' % tmp)
155 last[branch]=revision
156 heads[branch]=''
157 # we need this later to write out tags
158 marks[str(revision)]=':%d'%(revision+1)
160 ctx=repo.changectx(str(revision))
161 man=ctx.manifest()
162 added,changed,removed=get_filechanges(repo,revision,parents,man)
164 sys.stderr.write('Exporting revision %d with %d/%d/%d added/changed/removed files\n' %
165 (revision,len(added),len(changed),len(removed)))
167 for a in added+changed:
168 fctx=ctx.filectx(a)
169 d=fctx.data()
170 wr('M %s inline %s' % (gitmode(man.execf(a)),a))
171 wr('data %d' % len(d)) # had some trouble with size()
172 wr(d)
174 for r in removed:
175 wr('D %s' % r)
177 wr()
178 return checkpoint(count)
180 def export_tags(ui,repo,marks_cache,start,end,count,authors):
181 l=repo.tagslist()
182 for tag,node in l:
183 # ignore latest revision
184 if tag=='tip': continue
185 rev=repo.changelog.rev(node)
186 # ignore those tags not in our import range
187 if rev<start or rev>=end: continue
189 ref=marks_cache.get(str(rev),None)
190 if ref==None:
191 sys.stderr.write('Failed to find reference for creating tag'
192 ' %s at r%d\n' % (tag,rev))
193 continue
194 (_,user,(time,timezone),_,desc,branch,_)=get_changeset(ui,repo,rev,authors)
195 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
196 wr('tag %s' % tag)
197 wr('from %s' % ref)
198 wr('tagger %s %d %s' % (user,time,timezone))
199 msg='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag,
200 rev,branch,desc.split('\n')[0])
201 wr('data %d' % (len(msg)+1))
202 wr(msg)
203 wr()
204 count=checkpoint(count)
205 return count
207 def load_cache(filename):
208 cache={}
209 if not os.path.exists(filename):
210 return cache
211 f=open(filename,'r')
213 for line in f.readlines():
214 l+=1
215 fields=line.split(' ')
216 if fields==None or not len(fields)==2 or fields[0][0]!=':':
217 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
218 continue
219 # put key:value in cache, key without ^:
220 cache[fields[0][1:]]=fields[1].split('\n')[0]
221 f.close()
222 return cache
224 def save_cache(filename,cache):
225 f=open(filename,'w+')
226 map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
227 f.close()
229 def verify_heads(ui,repo,cache):
230 def getsha1(branch):
231 f=open(os.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch)
232 sha1=f.readlines()[0].split('\n')[0]
233 f.close()
234 return sha1
236 for b in cache.keys():
237 sys.stderr.write('Verifying branch [%s]\n' % b)
238 sha1=getsha1(b)
239 c=cache.get(b)
240 if sha1!=c:
241 sys.stderr.write('Warning: Branch [%s] modified outside hg2git:'
242 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
243 return True
245 def hg2git(repourl,m,marksfile,headsfile,tipfile,authors={}):
246 _max=int(m)
248 marks_cache=load_cache(marksfile)
249 heads_cache=load_cache(headsfile)
250 state_cache=load_cache(tipfile)
252 ui,repo=setup_repo(repourl)
254 if not verify_heads(ui,repo,heads_cache):
255 return 1
257 tip=repo.changelog.count()
259 min=int(state_cache.get('tip',0))
260 max=_max
261 if _max<0:
262 max=tip
265 last={}
266 for rev in range(min,max):
267 c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,tip,c,authors)
269 c=export_tags(ui,repo,marks_cache,min,max,c,authors)
271 sys.stderr.write('Issued %d commands\n' % c)
273 state_cache['tip']=max
274 state_cache['repo']=repourl
275 save_cache(tipfile,state_cache)
277 return 0
279 if __name__=='__main__':
280 if len(sys.argv)!=6: sys.exit(usage(1))
281 repourl,m,marksfile,headsfile,tipfile=sys.argv[1:]
282 sys.exit(hg2git(repourl,m,marksfile,headsfile,tipfile))