3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
38 sys
.stdout
.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count
):
43 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
44 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
49 def revnum_to_revref(rev
, old_marks
):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks
.get(rev
) or ':%d' % (rev
+1)
54 def file_mismatch(f1
,f2
):
55 """See if two revisions of a file are not equal."""
56 return node
.hex(f1
)!=node
.hex(f2
)
58 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
59 """Loop over our repository and find all changed and missing files."""
60 for left
in dleft
.keys():
61 right
=dright
.get(left
,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
66 # we have it but checksums mismatch: add to center set
68 for right
in dright
.keys():
69 left
=dleft
.get(right
,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo
,revision
,parents
,mleft
):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright
=repo
.changectx(p
).manifest()
82 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
88 def get_author(logmessage
,committer
,authors
):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines
=logmessage
.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines
[i
].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m
=sob_re
.match(loglines
[i
])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r
=fixup_user(first
.group(1),authors
)
125 def export_file_contents(ctx
,manifest
,files
,hgtags
):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags
and file == ".hgtags":
131 sys
.stderr
.write('Skip %s\n' % (file))
133 d
=ctx
.filectx(file).data()
134 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),file))
135 wr('data %d' % len(d
)) # had some trouble with size()
138 if count
%cfg_export
_boundary
==0:
139 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
140 if max>cfg_export_boundary
:
141 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
143 def sanitize_name(name
,what
="branch"):
144 """Sanitize input roughly according to git-check-ref-format(1)"""
147 if name
[0] == '.': return '_'+name
[1:]
151 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
153 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
154 n
='/'.join(map(dot
,n
.split('/')))
159 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
162 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
,hgtags
,notes
):
163 def get_branchname(name
):
164 if brmap
.has_key(name
):
166 n
=sanitize_name(name
)
170 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
172 branch
=get_branchname(branch
)
174 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
176 if len(parents
)==0 and revision
!= 0:
177 wr('reset refs/heads/%s' % branch
)
179 wr('commit refs/heads/%s' % branch
)
180 wr('mark :%d' % (revision
+1))
182 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
183 wr('committer %s %d %s' % (user
,time
,timezone
))
184 wr('data %d' % (len(desc
)+1)) # wtf?
188 ctx
=repo
.changectx(str(revision
))
190 added
,changed
,removed
,type=[],[],[],''
192 if len(parents
) == 0:
193 # first revision: feed in full manifest
198 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
199 if len(parents
) == 1:
200 # later non-merge revision: feed in changed manifest
201 # if we have exactly one parent, just take the changes from the
202 # manifest without expensively comparing checksums
203 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
204 added
,changed
,removed
=f
[1],f
[0],f
[2]
206 else: # a merge with two parents
207 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
208 # later merge revision: feed in changed manifest
209 # for many files comparing checksums is expensive so only do it for
210 # merges where we really need it due to hg's revlog logic
211 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
212 type='thorough delta'
214 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
215 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
217 map(lambda r
: wr('D %s' % r
),removed
)
218 export_file_contents(ctx
,man
,added
,hgtags
)
219 export_file_contents(ctx
,man
,changed
,hgtags
)
222 count
=checkpoint(count
)
223 count
=generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
)
226 def generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
):
229 wr('commit refs/notes/hg')
230 wr('committer %s %d %s' % (user
,time
,timezone
))
232 wr('N inline :%d' % (revision
+1))
234 wr('data %d' % (len(hg_hash
)))
237 return checkpoint(count
)
239 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
):
242 tag
=sanitize_name(tag
,"tag")
243 # ignore latest revision
244 if tag
=='tip': continue
245 # ignore tags to nodes that are missing (ie, 'in the future')
246 if node
.encode('hex_codec') not in mapping_cache
:
247 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
250 rev
=int(mapping_cache
[node
.encode('hex_codec')])
252 ref
=revnum_to_revref(rev
, old_marks
)
254 sys
.stderr
.write('Failed to find reference for creating tag'
255 ' %s at r%d\n' % (tag
,rev
))
257 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
258 wr('reset refs/tags/%s' % tag
)
261 count
=checkpoint(count
)
264 def load_authors(filename
):
266 if not os
.path
.exists(filename
):
271 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
272 for line
in f
.readlines():
275 if line
=='' or line
[0]=='#':
279 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
281 # put key:value in cache, key without ^:
282 cache
[m
.group(1).strip()]=m
.group(2).strip()
285 sys
.stderr
.write('Loaded %d authors\n' % a
)
288 def branchtip(repo
, heads
):
289 '''return the tipmost branch head in heads'''
291 for h
in reversed(heads
):
292 if 'close' not in repo
.changelog
.read(h
)[5]:
297 def verify_heads(ui
,repo
,cache
,force
):
299 for bn
, heads
in repo
.branchmap().iteritems():
300 branches
[bn
] = branchtip(repo
, heads
)
301 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
304 # get list of hg's branches to verify, don't take all git has
310 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
311 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
312 if not force
: return False
314 # verify that branch has exactly one head
316 for h
in repo
.heads():
317 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
318 if t
.get(branch
,False):
319 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
320 repo
.changelog
.rev(h
))
321 if not force
: return False
326 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False,hgtags
=False,notes
=False):
329 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
330 mapping_cache
=load_cache(mappingfile
)
331 heads_cache
=load_cache(headsfile
)
332 state_cache
=load_cache(tipfile
)
334 ui
,repo
=setup_repo(repourl
)
336 if not verify_heads(ui
,repo
,heads_cache
,force
):
340 tip
=repo
.changelog
.count()
341 except AttributeError:
344 min=int(state_cache
.get('tip',0))
346 if _max
<0 or max>tip
:
349 for rev
in range(0,max):
350 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
351 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
356 for rev
in range(min,max):
357 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
,hgtags
,notes
)
359 state_cache
['tip']=max
360 state_cache
['repo']=repourl
361 save_cache(tipfile
,state_cache
)
362 save_cache(mappingfile
,mapping_cache
)
364 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
)
366 sys
.stderr
.write('Issued %d commands\n' % c
)
370 if __name__
=='__main__':
371 def bail(parser
,opt
):
372 sys
.stderr
.write('Error: No %s option given\n' % opt
)
376 parser
=OptionParser()
378 parser
.add_option("-m","--max",type="int",dest
="max",
379 help="Maximum hg revision to import")
380 parser
.add_option("--mapping",dest
="mappingfile",
381 help="File to read last run's hg-to-git SHA1 mapping")
382 parser
.add_option("--marks",dest
="marksfile",
383 help="File to read git-fast-import's marks from")
384 parser
.add_option("--heads",dest
="headsfile",
385 help="File to read last run's git heads from")
386 parser
.add_option("--status",dest
="statusfile",
387 help="File to read status from")
388 parser
.add_option("-r","--repo",dest
="repourl",
389 help="URL of repo to import")
390 parser
.add_option("-s",action
="store_true",dest
="sob",
391 default
=False,help="Enable parsing Signed-off-by lines")
392 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
393 default
=False,help="Enable exporting .hgtags files")
394 parser
.add_option("-A","--authors",dest
="authorfile",
395 help="Read authormap from AUTHORFILE")
396 parser
.add_option("-f","--force",action
="store_true",dest
="force",
397 default
=False,help="Ignore validation errors by force")
398 parser
.add_option("-M","--default-branch",dest
="default_branch",
399 help="Set the default branch")
400 parser
.add_option("-o","--origin",dest
="origin_name",
401 help="use <name> as namespace to track upstream")
402 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
403 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
405 (options
,args
)=parser
.parse_args()
408 if options
.max!=None: m
=options
.max
410 if options
.marksfile
==None: bail(parser
,'--marks')
411 if options
.mappingfile
==None: bail(parser
,'--mapping')
412 if options
.headsfile
==None: bail(parser
,'--heads')
413 if options
.statusfile
==None: bail(parser
,'--status')
414 if options
.repourl
==None: bail(parser
,'--repo')
417 if options
.authorfile
!=None:
418 a
=load_authors(options
.authorfile
)
420 if options
.default_branch
!=None:
421 set_default_branch(options
.default_branch
)
423 if options
.origin_name
!=None:
424 set_origin_name(options
.origin_name
)
426 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,options
.headsfile
,
427 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,notes
=options
.notes
))