3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 # silly regex to catch Signed-off-by lines in log message
15 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
16 # insert 'checkpoint' command after this many commits or none at all if 0
17 cfg_checkpoint_count
=0
18 # write some progress message every this many file contents written
19 cfg_export_boundary
=1000
22 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
28 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
30 def checkpoint(count
):
32 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
33 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
38 def revnum_to_revref(rev
, old_marks
):
39 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
41 return old_marks
.get(rev
) or ':%d' % (rev
+1)
43 def file_mismatch(f1
,f2
):
44 """See if two revisions of a file are not equal."""
45 return node
.hex(f1
)!=node
.hex(f2
)
47 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
48 """Loop over our repository and find all changed and missing files."""
49 for left
in dleft
.keys():
50 right
=dright
.get(left
,None)
52 # we have the file but our parent hasn't: add to left set
54 elif match(dleft
[left
],right
):
55 # we have it but checksums mismatch: add to center set
57 for right
in dright
.keys():
58 left
=dleft
.get(right
,None)
60 # if parent has file but we don't: add to right set
62 # change is already handled when comparing child against parent
65 def get_filechanges(repo
,revision
,parents
,mleft
):
66 """Given some repository and revision, find all changed/deleted files."""
70 mright
=repo
.changectx(p
).manifest()
71 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
77 def get_author(logmessage
,committer
,authors
):
78 """As git distincts between author and committer of a patch, try to
79 extract author by detecting Signed-off-by lines.
81 This walks from the end of the log message towards the top skipping
82 empty lines. Upon the first non-empty line, it walks all Signed-off-by
83 lines upwards to find the first one. For that (if found), it extracts
84 authorship information the usual way (authors table, cleaning, etc.)
86 If no Signed-off-by line is found, this defaults to the committer.
88 This may sound stupid (and it somehow is), but in log messages we
89 accidentially may have lines in the middle starting with
90 "Signed-off-by: foo" and thus matching our detection regex. Prevent
93 loglines
=logmessage
.split('\n')
95 # from tail walk to top skipping empty lines
98 if len(loglines
[i
].strip())==0: continue
101 # walk further upwards to find first sob line, store in 'first'
104 m
=sob_re
.match(loglines
[i
])
108 # if the last non-empty line matches our Signed-Off-by regex: extract username
110 r
=fixup_user(first
.group(1),authors
)
114 def export_file_contents(ctx
,manifest
,files
):
118 # Skip .hgtags files. They only get us in trouble.
119 if file == ".hgtags":
120 sys
.stderr
.write('Skip %s\n' % (file))
122 d
=ctx
.filectx(file).data()
123 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),file))
124 wr('data %d' % len(d
)) # had some trouble with size()
127 if count
%cfg_export
_boundary
==0:
128 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
129 if max>cfg_export_boundary
:
130 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
132 def sanitize_name(name
,what
="branch"):
133 """Sanitize input roughly according to git-check-ref-format(1)"""
136 if name
[0] == '.': return '_'+name
[1:]
140 p
=re
.compile('([[ ~^:?*]|\.\.)')
142 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
143 n
='/'.join(map(dot
,n
.split('/')))
148 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
151 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
):
152 def get_branchname(name
):
153 if brmap
.has_key(name
):
155 n
=sanitize_name(name
)
159 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
161 branch
=get_branchname(branch
)
163 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
165 if len(parents
)==0 and revision
!= 0:
166 wr('reset refs/heads/%s' % branch
)
168 wr('commit refs/heads/%s' % branch
)
169 wr('mark :%d' % (revision
+1))
171 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
172 wr('committer %s %d %s' % (user
,time
,timezone
))
173 wr('data %d' % (len(desc
)+1)) # wtf?
178 # Sort the parents based on revision ids so that we always get the
179 # same resulting git repo, no matter how the revisions were
181 parents
.sort(key
=repo
.changelog
.node
, reverse
=True)
183 ctx
=repo
.changectx(str(revision
))
185 added
,changed
,removed
,type=[],[],[],''
187 if len(parents
) == 0:
188 # first revision: feed in full manifest
193 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
194 if len(parents
) == 1:
195 # later non-merge revision: feed in changed manifest
196 # if we have exactly one parent, just take the changes from the
197 # manifest without expensively comparing checksums
198 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
199 added
,changed
,removed
=f
[1],f
[0],f
[2]
201 else: # a merge with two parents
202 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
203 # later merge revision: feed in changed manifest
204 # for many files comparing checksums is expensive so only do it for
205 # merges where we really need it due to hg's revlog logic
206 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
207 type='thorough delta'
209 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
210 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
212 map(lambda r
: wr('D %s' % r
),removed
)
213 export_file_contents(ctx
,man
,added
)
214 export_file_contents(ctx
,man
,changed
)
217 return checkpoint(count
)
219 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
):
222 tag
=sanitize_name(tag
,"tag")
223 # ignore latest revision
224 if tag
=='tip': continue
225 # ignore tags to nodes that are missing (ie, 'in the future')
226 if node
.encode('hex_codec') not in mapping_cache
:
227 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
230 rev
=int(mapping_cache
[node
.encode('hex_codec')])
232 ref
=revnum_to_revref(rev
, old_marks
)
234 sys
.stderr
.write('Failed to find reference for creating tag'
235 ' %s at r%d\n' % (tag
,rev
))
237 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
238 wr('reset refs/tags/%s' % tag
)
241 count
=checkpoint(count
)
244 def load_authors(filename
):
246 if not os
.path
.exists(filename
):
250 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
251 for line
in f
.readlines():
255 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
257 # put key:value in cache, key without ^:
258 cache
[m
.group(1).strip()]=m
.group(2).strip()
260 sys
.stderr
.write('Loaded %d authors\n' % l
)
263 def verify_heads(ui
,repo
,cache
,force
):
264 branches
=repo
.branchtags()
265 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
268 # get list of hg's branches to verify, don't take all git has
274 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
275 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
276 if not force
: return False
278 # verify that branch has exactly one head
280 for h
in repo
.heads():
281 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
282 if t
.get(branch
,False):
283 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
284 repo
.changelog
.rev(h
))
285 if not force
: return False
290 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False):
293 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
294 mapping_cache
=load_cache(mappingfile
)
295 heads_cache
=load_cache(headsfile
)
296 state_cache
=load_cache(tipfile
)
298 ui
,repo
=setup_repo(repourl
)
300 if not verify_heads(ui
,repo
,heads_cache
,force
):
304 tip
=repo
.changelog
.count()
305 except AttributeError:
308 min=int(state_cache
.get('tip',0))
310 if _max
<0 or max>tip
:
313 for rev
in range(0,max):
314 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
315 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
320 for rev
in range(min,max):
321 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
)
323 state_cache
['tip']=max
324 state_cache
['repo']=repourl
325 save_cache(tipfile
,state_cache
)
326 save_cache(mappingfile
,mapping_cache
)
328 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
)
330 sys
.stderr
.write('Issued %d commands\n' % c
)
334 if __name__
=='__main__':
335 def bail(parser
,opt
):
336 sys
.stderr
.write('Error: No %s option given\n' % opt
)
340 parser
=OptionParser()
342 parser
.add_option("-m","--max",type="int",dest
="max",
343 help="Maximum hg revision to import")
344 parser
.add_option("--mapping",dest
="mappingfile",
345 help="File to read last run's hg-to-git SHA1 mapping")
346 parser
.add_option("--marks",dest
="marksfile",
347 help="File to read git-fast-import's marks from")
348 parser
.add_option("--heads",dest
="headsfile",
349 help="File to read last run's git heads from")
350 parser
.add_option("--status",dest
="statusfile",
351 help="File to read status from")
352 parser
.add_option("-r","--repo",dest
="repourl",
353 help="URL of repo to import")
354 parser
.add_option("-s",action
="store_true",dest
="sob",
355 default
=False,help="Enable parsing Signed-off-by lines")
356 parser
.add_option("-A","--authors",dest
="authorfile",
357 help="Read authormap from AUTHORFILE")
358 parser
.add_option("-f","--force",action
="store_true",dest
="force",
359 default
=False,help="Ignore validation errors by force")
360 parser
.add_option("-M","--default-branch",dest
="default_branch",
361 help="Set the default branch")
362 parser
.add_option("-o","--origin",dest
="origin_name",
363 help="use <name> as namespace to track upstream")
365 (options
,args
)=parser
.parse_args()
368 if options
.max!=None: m
=options
.max
370 if options
.marksfile
==None: bail(parser
,'--marks')
371 if options
.mappingfile
==None: bail(parser
,'--mapping')
372 if options
.headsfile
==None: bail(parser
,'--heads')
373 if options
.statusfile
==None: bail(parser
,'--status')
374 if options
.repourl
==None: bail(parser
,'--repo')
377 if options
.authorfile
!=None:
378 a
=load_authors(options
.authorfile
)
380 if options
.default_branch
!=None:
381 set_default_branch(options
.default_branch
)
383 if options
.origin_name
!=None:
384 set_origin_name(options
.origin_name
)
386 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,options
.headsfile
,
387 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
))