3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
35 sys
.stdout
.write('\n')
36 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
38 def checkpoint(count
):
40 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
41 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
46 def revnum_to_revref(rev
, old_marks
):
47 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
49 return old_marks
.get(rev
) or ':%d' % (rev
+1)
51 def file_mismatch(f1
,f2
):
52 """See if two revisions of a file are not equal."""
53 return node
.hex(f1
)!=node
.hex(f2
)
55 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
56 """Loop over our repository and find all changed and missing files."""
57 for left
in dleft
.keys():
58 right
=dright
.get(left
,None)
60 # we have the file but our parent hasn't: add to left set
62 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
63 # we have it but checksums mismatch: add to center set
65 for right
in dright
.keys():
66 left
=dleft
.get(right
,None)
68 # if parent has file but we don't: add to right set
70 # change is already handled when comparing child against parent
73 def get_filechanges(repo
,revision
,parents
,mleft
):
74 """Given some repository and revision, find all changed/deleted files."""
78 mright
=repo
.changectx(p
).manifest()
79 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
85 def get_author(logmessage
,committer
,authors
):
86 """As git distincts between author and committer of a patch, try to
87 extract author by detecting Signed-off-by lines.
89 This walks from the end of the log message towards the top skipping
90 empty lines. Upon the first non-empty line, it walks all Signed-off-by
91 lines upwards to find the first one. For that (if found), it extracts
92 authorship information the usual way (authors table, cleaning, etc.)
94 If no Signed-off-by line is found, this defaults to the committer.
96 This may sound stupid (and it somehow is), but in log messages we
97 accidentially may have lines in the middle starting with
98 "Signed-off-by: foo" and thus matching our detection regex. Prevent
101 loglines
=logmessage
.split('\n')
103 # from tail walk to top skipping empty lines
106 if len(loglines
[i
].strip())==0: continue
109 # walk further upwards to find first sob line, store in 'first'
112 m
=sob_re
.match(loglines
[i
])
116 # if the last non-empty line matches our Signed-Off-by regex: extract username
118 r
=fixup_user(first
.group(1),authors
)
122 def export_file_contents(ctx
,manifest
,files
,hgtags
):
126 # Skip .hgtags files. They only get us in trouble.
127 if not hgtags
and file == ".hgtags":
128 sys
.stderr
.write('Skip %s\n' % (file))
130 d
=ctx
.filectx(file).data()
131 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),file))
132 wr('data %d' % len(d
)) # had some trouble with size()
135 if count
%cfg_export
_boundary
==0:
136 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
137 if max>cfg_export_boundary
:
138 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
140 def sanitize_name(name
,what
="branch"):
141 """Sanitize input roughly according to git-check-ref-format(1)"""
144 if name
[0] == '.': return '_'+name
[1:]
148 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
150 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
151 n
='/'.join(map(dot
,n
.split('/')))
156 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
159 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
,hgtags
):
160 def get_branchname(name
):
161 if brmap
.has_key(name
):
163 n
=sanitize_name(name
)
167 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
169 branch
=get_branchname(branch
)
171 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
173 if len(parents
)==0 and revision
!= 0:
174 wr('reset refs/heads/%s' % branch
)
176 wr('commit refs/heads/%s' % branch
)
177 wr('mark :%d' % (revision
+1))
179 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
180 wr('committer %s %d %s' % (user
,time
,timezone
))
181 wr('data %d' % (len(desc
)+1)) # wtf?
185 ctx
=repo
.changectx(str(revision
))
187 added
,changed
,removed
,type=[],[],[],''
189 if len(parents
) == 0:
190 # first revision: feed in full manifest
195 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
196 if len(parents
) == 1:
197 # later non-merge revision: feed in changed manifest
198 # if we have exactly one parent, just take the changes from the
199 # manifest without expensively comparing checksums
200 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
201 added
,changed
,removed
=f
[1],f
[0],f
[2]
203 else: # a merge with two parents
204 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
205 # later merge revision: feed in changed manifest
206 # for many files comparing checksums is expensive so only do it for
207 # merges where we really need it due to hg's revlog logic
208 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
209 type='thorough delta'
211 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
212 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
214 map(lambda r
: wr('D %s' % r
),removed
)
215 export_file_contents(ctx
,man
,added
,hgtags
)
216 export_file_contents(ctx
,man
,changed
,hgtags
)
219 return checkpoint(count
)
221 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
):
224 tag
=sanitize_name(tag
,"tag")
225 # ignore latest revision
226 if tag
=='tip': continue
227 # ignore tags to nodes that are missing (ie, 'in the future')
228 if node
.encode('hex_codec') not in mapping_cache
:
229 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
232 rev
=int(mapping_cache
[node
.encode('hex_codec')])
234 ref
=revnum_to_revref(rev
, old_marks
)
236 sys
.stderr
.write('Failed to find reference for creating tag'
237 ' %s at r%d\n' % (tag
,rev
))
239 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
240 wr('reset refs/tags/%s' % tag
)
243 count
=checkpoint(count
)
246 def load_authors(filename
):
248 if not os
.path
.exists(filename
):
253 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
254 for line
in f
.readlines():
257 if line
=='' or line
[0]=='#':
261 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
263 # put key:value in cache, key without ^:
264 cache
[m
.group(1).strip()]=m
.group(2).strip()
267 sys
.stderr
.write('Loaded %d authors\n' % a
)
270 def branchtip(repo
, heads
):
271 '''return the tipmost branch head in heads'''
273 for h
in reversed(heads
):
274 if 'close' not in repo
.changelog
.read(h
)[5]:
279 def verify_heads(ui
,repo
,cache
,force
):
281 for bn
, heads
in repo
.branchmap().iteritems():
282 branches
[bn
] = branchtip(repo
, heads
)
283 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
286 # get list of hg's branches to verify, don't take all git has
292 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
293 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
294 if not force
: return False
296 # verify that branch has exactly one head
298 for h
in repo
.heads():
299 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
300 if t
.get(branch
,False):
301 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
302 repo
.changelog
.rev(h
))
303 if not force
: return False
308 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False,hgtags
=False):
311 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
312 mapping_cache
=load_cache(mappingfile
)
313 heads_cache
=load_cache(headsfile
)
314 state_cache
=load_cache(tipfile
)
316 ui
,repo
=setup_repo(repourl
)
318 if not verify_heads(ui
,repo
,heads_cache
,force
):
322 tip
=repo
.changelog
.count()
323 except AttributeError:
326 min=int(state_cache
.get('tip',0))
328 if _max
<0 or max>tip
:
331 for rev
in range(0,max):
332 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
333 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
338 for rev
in range(min,max):
339 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
,hgtags
)
341 state_cache
['tip']=max
342 state_cache
['repo']=repourl
343 save_cache(tipfile
,state_cache
)
344 save_cache(mappingfile
,mapping_cache
)
346 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
)
348 sys
.stderr
.write('Issued %d commands\n' % c
)
352 if __name__
=='__main__':
353 def bail(parser
,opt
):
354 sys
.stderr
.write('Error: No %s option given\n' % opt
)
358 parser
=OptionParser()
360 parser
.add_option("-m","--max",type="int",dest
="max",
361 help="Maximum hg revision to import")
362 parser
.add_option("--mapping",dest
="mappingfile",
363 help="File to read last run's hg-to-git SHA1 mapping")
364 parser
.add_option("--marks",dest
="marksfile",
365 help="File to read git-fast-import's marks from")
366 parser
.add_option("--heads",dest
="headsfile",
367 help="File to read last run's git heads from")
368 parser
.add_option("--status",dest
="statusfile",
369 help="File to read status from")
370 parser
.add_option("-r","--repo",dest
="repourl",
371 help="URL of repo to import")
372 parser
.add_option("-s",action
="store_true",dest
="sob",
373 default
=False,help="Enable parsing Signed-off-by lines")
374 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
375 default
=False,help="Enable exporting .hgtags files")
376 parser
.add_option("-A","--authors",dest
="authorfile",
377 help="Read authormap from AUTHORFILE")
378 parser
.add_option("-f","--force",action
="store_true",dest
="force",
379 default
=False,help="Ignore validation errors by force")
380 parser
.add_option("-M","--default-branch",dest
="default_branch",
381 help="Set the default branch")
382 parser
.add_option("-o","--origin",dest
="origin_name",
383 help="use <name> as namespace to track upstream")
385 (options
,args
)=parser
.parse_args()
388 if options
.max!=None: m
=options
.max
390 if options
.marksfile
==None: bail(parser
,'--marks')
391 if options
.mappingfile
==None: bail(parser
,'--mapping')
392 if options
.headsfile
==None: bail(parser
,'--heads')
393 if options
.statusfile
==None: bail(parser
,'--status')
394 if options
.repourl
==None: bail(parser
,'--repo')
397 if options
.authorfile
!=None:
398 a
=load_authors(options
.authorfile
)
400 if options
.default_branch
!=None:
401 set_default_branch(options
.default_branch
)
403 if options
.origin_name
!=None:
404 set_origin_name(options
.origin_name
)
406 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,options
.headsfile
,
407 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
))