3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
38 sys
.stdout
.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count
):
43 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
44 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
49 def revnum_to_revref(rev
, old_marks
):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks
.get(rev
) or ':%d' % (rev
+1)
54 def file_mismatch(f1
,f2
):
55 """See if two revisions of a file are not equal."""
56 return node
.hex(f1
)!=node
.hex(f2
)
58 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
59 """Loop over our repository and find all changed and missing files."""
60 for left
in dleft
.keys():
61 right
=dright
.get(left
,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
66 # we have it but checksums mismatch: add to center set
68 for right
in dright
.keys():
69 left
=dleft
.get(right
,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo
,revision
,parents
,mleft
):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright
=repo
.changectx(p
).manifest()
82 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
88 def get_author(logmessage
,committer
,authors
):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines
=logmessage
.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines
[i
].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m
=sob_re
.match(loglines
[i
])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r
=fixup_user(first
.group(1),authors
)
125 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
=''):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags
and file == ".hgtags":
131 sys
.stderr
.write('Skip %s\n' % (file))
133 d
=ctx
.filectx(file).data()
135 filename
=file.decode(encoding
).encode('utf8')
138 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
139 strip_leading_slash(filename
)))
140 wr('data %d' % len(d
)) # had some trouble with size()
143 if count
%cfg_export
_boundary
==0:
144 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
145 if max>cfg_export_boundary
:
146 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
148 def sanitize_name(name
,what
="branch"):
149 """Sanitize input roughly according to git-check-ref-format(1)"""
152 if name
[0] == '.': return '_'+name
[1:]
156 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
158 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
159 n
='/'.join(map(dot
,n
.split('/')))
164 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
167 def strip_leading_slash(filename
):
168 if filename
[0] == '/':
172 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,
173 branchesmap
,sob
,brmap
,hgtags
,notes
,encoding
=''):
174 def get_branchname(name
):
175 if brmap
.has_key(name
):
177 n
=sanitize_name(branchesmap
.get(name
,name
))
181 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
183 branch
=get_branchname(branch
)
185 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
187 if len(parents
)==0 and revision
!= 0:
188 wr('reset refs/heads/%s' % branch
)
190 wr('commit refs/heads/%s' % branch
)
191 wr('mark :%d' % (revision
+1))
193 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
194 wr('committer %s %d %s' % (user
,time
,timezone
))
195 wr('data %d' % (len(desc
)+1)) # wtf?
199 ctx
=repo
.changectx(str(revision
))
201 added
,changed
,removed
,type=[],[],[],''
203 if len(parents
) == 0:
204 # first revision: feed in full manifest
209 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
210 if len(parents
) == 1:
211 # later non-merge revision: feed in changed manifest
212 # if we have exactly one parent, just take the changes from the
213 # manifest without expensively comparing checksums
214 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
215 added
,changed
,removed
=f
[1],f
[0],f
[2]
217 else: # a merge with two parents
218 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
219 # later merge revision: feed in changed manifest
220 # for many files comparing checksums is expensive so only do it for
221 # merges where we really need it due to hg's revlog logic
222 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
223 type='thorough delta'
225 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
226 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
229 removed
=[r
.decode(encoding
).encode('utf8') for r
in removed
]
231 removed
=[strip_leading_slash(x
) for x
in removed
]
233 map(lambda r
: wr('D %s' % r
),removed
)
234 export_file_contents(ctx
,man
,added
,hgtags
,encoding
)
235 export_file_contents(ctx
,man
,changed
,hgtags
,encoding
)
238 count
=checkpoint(count
)
239 count
=generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
)
242 def generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
):
245 wr('commit refs/notes/hg')
246 wr('committer %s %d %s' % (user
,time
,timezone
))
248 wr('N inline :%d' % (revision
+1))
250 wr('data %d' % (len(hg_hash
)))
253 return checkpoint(count
)
255 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,tagsmap
):
258 # Remap the branch name
259 tag
=sanitize_name(tagsmap
.get(tag
,tag
),"tag")
260 # ignore latest revision
261 if tag
=='tip': continue
262 # ignore tags to nodes that are missing (ie, 'in the future')
263 if node
.encode('hex_codec') not in mapping_cache
:
264 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
267 rev
=int(mapping_cache
[node
.encode('hex_codec')])
269 ref
=revnum_to_revref(rev
, old_marks
)
271 sys
.stderr
.write('Failed to find reference for creating tag'
272 ' %s at r%d\n' % (tag
,rev
))
274 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
275 wr('reset refs/tags/%s' % tag
)
278 count
=checkpoint(count
)
281 def load_mapping(name
, filename
):
283 if not os
.path
.exists(filename
):
288 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
289 for line
in f
.readlines():
292 if line
=='' or line
[0]=='#':
296 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
298 # put key:value in cache, key without ^:
299 cache
[m
.group(1).strip()]=m
.group(2).strip()
302 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
305 def branchtip(repo
, heads
):
306 '''return the tipmost branch head in heads'''
308 for h
in reversed(heads
):
309 if 'close' not in repo
.changelog
.read(h
)[5]:
314 def verify_heads(ui
,repo
,cache
,force
):
316 for bn
, heads
in repo
.branchmap().iteritems():
317 branches
[bn
] = branchtip(repo
, heads
)
318 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
321 # get list of hg's branches to verify, don't take all git has
327 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
328 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
329 if not force
: return False
331 # verify that branch has exactly one head
333 for h
in repo
.heads():
334 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
335 if t
.get(branch
,False):
336 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
337 repo
.changelog
.rev(h
))
338 if not force
: return False
343 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,
344 authors
={},branchesmap
={},tagsmap
={},
345 sob
=False,force
=False,hgtags
=False,notes
=False,encoding
=''):
348 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
349 mapping_cache
=load_cache(mappingfile
)
350 heads_cache
=load_cache(headsfile
)
351 state_cache
=load_cache(tipfile
)
353 ui
,repo
=setup_repo(repourl
)
355 if not verify_heads(ui
,repo
,heads_cache
,force
):
359 tip
=repo
.changelog
.count()
360 except AttributeError:
363 min=int(state_cache
.get('tip',0))
365 if _max
<0 or max>tip
:
368 for rev
in range(0,max):
369 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
370 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
375 for rev
in range(min,max):
376 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,branchesmap
,
377 sob
,brmap
,hgtags
,notes
,encoding
)
379 state_cache
['tip']=max
380 state_cache
['repo']=repourl
381 save_cache(tipfile
,state_cache
)
382 save_cache(mappingfile
,mapping_cache
)
384 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,tagsmap
)
386 sys
.stderr
.write('Issued %d commands\n' % c
)
390 if __name__
=='__main__':
391 def bail(parser
,opt
):
392 sys
.stderr
.write('Error: No %s option given\n' % opt
)
396 parser
=OptionParser()
398 parser
.add_option("-m","--max",type="int",dest
="max",
399 help="Maximum hg revision to import")
400 parser
.add_option("--mapping",dest
="mappingfile",
401 help="File to read last run's hg-to-git SHA1 mapping")
402 parser
.add_option("--marks",dest
="marksfile",
403 help="File to read git-fast-import's marks from")
404 parser
.add_option("--heads",dest
="headsfile",
405 help="File to read last run's git heads from")
406 parser
.add_option("--status",dest
="statusfile",
407 help="File to read status from")
408 parser
.add_option("-r","--repo",dest
="repourl",
409 help="URL of repo to import")
410 parser
.add_option("-s",action
="store_true",dest
="sob",
411 default
=False,help="Enable parsing Signed-off-by lines")
412 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
413 default
=False,help="Enable exporting .hgtags files")
414 parser
.add_option("-A","--authors",dest
="authorfile",
415 help="Read authormap from AUTHORFILE")
416 parser
.add_option("-B","--branches",dest
="branchesfile",
417 help="Read branch map from BRANCHESFILE")
418 parser
.add_option("-T","--tags",dest
="tagsfile",
419 help="Read tags map from TAGSFILE")
420 parser
.add_option("-f","--force",action
="store_true",dest
="force",
421 default
=False,help="Ignore validation errors by force")
422 parser
.add_option("-M","--default-branch",dest
="default_branch",
423 help="Set the default branch")
424 parser
.add_option("-o","--origin",dest
="origin_name",
425 help="use <name> as namespace to track upstream")
426 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
427 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
428 parser
.add_option("-e",dest
="encoding",
429 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
431 (options
,args
)=parser
.parse_args()
434 if options
.max!=None: m
=options
.max
436 if options
.marksfile
==None: bail(parser
,'--marks')
437 if options
.mappingfile
==None: bail(parser
,'--mapping')
438 if options
.headsfile
==None: bail(parser
,'--heads')
439 if options
.statusfile
==None: bail(parser
,'--status')
440 if options
.repourl
==None: bail(parser
,'--repo')
443 if options
.authorfile
!=None:
444 a
=load_mapping('authors', options
.authorfile
)
447 if options
.branchesfile
!=None:
448 b
=load_mapping('branches', options
.branchesfile
)
451 if options
.tagsfile
!=None:
452 t
=load_mapping('tags', options
.tagsfile
)
454 if options
.default_branch
!=None:
455 set_default_branch(options
.default_branch
)
457 if options
.origin_name
!=None:
458 set_origin_name(options
.origin_name
)
461 if options
.encoding
!=None:
462 encoding
=options
.encoding
464 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
465 options
.headsfile
, options
.statusfile
,
466 authors
=a
,branchesmap
=b
,tagsmap
=t
,
467 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
468 notes
=options
.notes
,encoding
=encoding
))