3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
38 sys
.stdout
.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count
):
43 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
44 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
49 def revnum_to_revref(rev
, old_marks
):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks
.get(rev
) or ':%d' % (rev
+1)
54 def file_mismatch(f1
,f2
):
55 """See if two revisions of a file are not equal."""
56 return node
.hex(f1
)!=node
.hex(f2
)
58 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
59 """Loop over our repository and find all changed and missing files."""
60 for left
in dleft
.keys():
61 right
=dright
.get(left
,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
66 # we have it but checksums mismatch: add to center set
68 for right
in dright
.keys():
69 left
=dleft
.get(right
,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo
,revision
,parents
,mleft
):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright
=repo
.changectx(p
).manifest()
82 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
88 def get_author(logmessage
,committer
,authors
):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines
=logmessage
.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines
[i
].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m
=sob_re
.match(loglines
[i
])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r
=fixup_user(first
.group(1),authors
)
125 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
=''):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags
and file == ".hgtags":
131 sys
.stderr
.write('Skip %s\n' % (file))
133 d
=ctx
.filectx(file).data()
135 filename
=file.decode(encoding
).encode('utf8')
138 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
139 strip_leading_slash(filename
)))
140 wr('data %d' % len(d
)) # had some trouble with size()
143 if count
%cfg_export
_boundary
==0:
144 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
145 if max>cfg_export_boundary
:
146 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
148 def sanitize_name(name
,what
="branch", mapping
={}):
149 """Sanitize input roughly according to git-check-ref-format(1)"""
151 # NOTE: Do not update this transform to work around
152 # incompatibilities on your platform. If you change it and it starts
153 # modifying names which previously were not touched it will break
154 # preexisting setups which are doing incremental imports.
156 # Use the -B and -T options to mangle branch and tag names
157 # instead. If you have a source repository where this is too much
158 # work to do manually, write a tool that does it for you.
161 if name
[0] == '.': return '_'+name
[1:]
164 n
=mapping
.get(name
,name
)
165 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
167 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
168 n
='/'.join(map(dot
,n
.split('/')))
173 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
176 def strip_leading_slash(filename
):
177 if filename
[0] == '/':
181 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,
182 branchesmap
,sob
,brmap
,hgtags
,encoding
='',fn_encoding
=''):
183 def get_branchname(name
):
184 if brmap
.has_key(name
):
186 n
=sanitize_name(name
, "branch", branchesmap
)
190 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
192 branch
=get_branchname(branch
)
194 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
196 if len(parents
)==0 and revision
!= 0:
197 wr('reset refs/heads/%s' % branch
)
199 wr('commit refs/heads/%s' % branch
)
200 wr('mark :%d' % (revision
+1))
202 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
203 wr('committer %s %d %s' % (user
,time
,timezone
))
204 wr('data %d' % (len(desc
)+1)) # wtf?
208 ctx
=repo
.changectx(str(revision
))
210 added
,changed
,removed
,type=[],[],[],''
212 if len(parents
) == 0:
213 # first revision: feed in full manifest
218 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
219 if len(parents
) == 1:
220 # later non-merge revision: feed in changed manifest
221 # if we have exactly one parent, just take the changes from the
222 # manifest without expensively comparing checksums
223 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
224 added
,changed
,removed
=f
[1],f
[0],f
[2]
226 else: # a merge with two parents
227 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
228 # later merge revision: feed in changed manifest
229 # for many files comparing checksums is expensive so only do it for
230 # merges where we really need it due to hg's revlog logic
231 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
232 type='thorough delta'
234 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
235 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
238 removed
=[r
.decode(fn_encoding
).encode('utf8') for r
in removed
]
240 removed
=[strip_leading_slash(x
) for x
in removed
]
242 map(lambda r
: wr('D %s' % r
),removed
)
243 export_file_contents(ctx
,man
,added
,hgtags
,fn_encoding
)
244 export_file_contents(ctx
,man
,changed
,hgtags
,fn_encoding
)
247 return checkpoint(count
)
249 def export_note(ui
,repo
,revision
,count
,authors
,encoding
,is_first
):
250 (revnode
,_
,user
,(time
,timezone
),_
,_
,_
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
252 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
254 wr('commit refs/notes/hg')
255 wr('committer %s %d %s' % (user
,time
,timezone
))
258 wr('from refs/notes/hg^0')
259 wr('N inline :%d' % (revision
+1))
260 hg_hash
=repo
.changectx(str(revision
)).hex()
261 wr('data %d' % (len(hg_hash
)))
264 return checkpoint(count
)
266 wr('data %d' % (len(desc
)+1)) # wtf?
270 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,tagsmap
):
273 # Remap the branch name
274 tag
=sanitize_name(tag
,"tag",tagsmap
)
275 # ignore latest revision
276 if tag
=='tip': continue
277 # ignore tags to nodes that are missing (ie, 'in the future')
278 if node
.encode('hex_codec') not in mapping_cache
:
279 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
282 rev
=int(mapping_cache
[node
.encode('hex_codec')])
284 ref
=revnum_to_revref(rev
, old_marks
)
286 sys
.stderr
.write('Failed to find reference for creating tag'
287 ' %s at r%d\n' % (tag
,rev
))
289 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
290 wr('reset refs/tags/%s' % tag
)
293 count
=checkpoint(count
)
296 def load_mapping(name
, filename
):
298 if not os
.path
.exists(filename
):
299 sys
.stderr
.write('Could not open mapping file [%s]\n' % (filename
))
304 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
305 for line
in f
.readlines():
308 if line
=='' or line
[0]=='#':
312 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
314 # put key:value in cache, key without ^:
315 cache
[m
.group(1).strip()]=m
.group(2).strip()
318 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
321 def branchtip(repo
, heads
):
322 '''return the tipmost branch head in heads'''
324 for h
in reversed(heads
):
325 if 'close' not in repo
.changelog
.read(h
)[5]:
330 def verify_heads(ui
,repo
,cache
,force
,branchesmap
):
332 for bn
, heads
in repo
.branchmap().iteritems():
333 branches
[bn
] = branchtip(repo
, heads
)
334 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
337 # get list of hg's branches to verify, don't take all git has
340 sha1
=get_git_sha1(sanitize_name(b
,"branch",branchesmap
))
343 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
344 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
345 if not force
: return False
347 # verify that branch has exactly one head
349 for h
in repo
.heads():
350 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
351 if t
.get(branch
,False):
352 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
353 repo
.changelog
.rev(h
))
354 if not force
: return False
359 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,
360 authors
={},branchesmap
={},tagsmap
={},
361 sob
=False,force
=False,hgtags
=False,notes
=False,encoding
='',fn_encoding
=''):
364 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
365 mapping_cache
=load_cache(mappingfile
)
366 heads_cache
=load_cache(headsfile
)
367 state_cache
=load_cache(tipfile
)
369 ui
,repo
=setup_repo(repourl
)
371 if not verify_heads(ui
,repo
,heads_cache
,force
,branchesmap
):
375 tip
=repo
.changelog
.count()
376 except AttributeError:
379 min=int(state_cache
.get('tip',0))
381 if _max
<0 or max>tip
:
384 for rev
in range(0,max):
385 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
386 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
391 for rev
in range(min,max):
392 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,branchesmap
,
393 sob
,brmap
,hgtags
,encoding
,fn_encoding
)
395 for rev
in range(min,max):
396 c
=export_note(ui
,repo
,rev
,c
,authors
, encoding
, rev
== min and min != 0)
398 state_cache
['tip']=max
399 state_cache
['repo']=repourl
400 save_cache(tipfile
,state_cache
)
401 save_cache(mappingfile
,mapping_cache
)
403 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,tagsmap
)
405 sys
.stderr
.write('Issued %d commands\n' % c
)
409 if __name__
=='__main__':
410 def bail(parser
,opt
):
411 sys
.stderr
.write('Error: No %s option given\n' % opt
)
415 parser
=OptionParser()
417 parser
.add_option("-m","--max",type="int",dest
="max",
418 help="Maximum hg revision to import")
419 parser
.add_option("--mapping",dest
="mappingfile",
420 help="File to read last run's hg-to-git SHA1 mapping")
421 parser
.add_option("--marks",dest
="marksfile",
422 help="File to read git-fast-import's marks from")
423 parser
.add_option("--heads",dest
="headsfile",
424 help="File to read last run's git heads from")
425 parser
.add_option("--status",dest
="statusfile",
426 help="File to read status from")
427 parser
.add_option("-r","--repo",dest
="repourl",
428 help="URL of repo to import")
429 parser
.add_option("-s",action
="store_true",dest
="sob",
430 default
=False,help="Enable parsing Signed-off-by lines")
431 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
432 default
=False,help="Enable exporting .hgtags files")
433 parser
.add_option("-A","--authors",dest
="authorfile",
434 help="Read authormap from AUTHORFILE")
435 parser
.add_option("-B","--branches",dest
="branchesfile",
436 help="Read branch map from BRANCHESFILE")
437 parser
.add_option("-T","--tags",dest
="tagsfile",
438 help="Read tags map from TAGSFILE")
439 parser
.add_option("-f","--force",action
="store_true",dest
="force",
440 default
=False,help="Ignore validation errors by force")
441 parser
.add_option("-M","--default-branch",dest
="default_branch",
442 help="Set the default branch")
443 parser
.add_option("-o","--origin",dest
="origin_name",
444 help="use <name> as namespace to track upstream")
445 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
446 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
447 parser
.add_option("-e",dest
="encoding",
448 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
449 parser
.add_option("--fe",dest
="fn_encoding",
450 help="Assume file names from Mercurial are encoded in <filename_encoding>")
452 (options
,args
)=parser
.parse_args()
455 if options
.max!=None: m
=options
.max
457 if options
.marksfile
==None: bail(parser
,'--marks')
458 if options
.mappingfile
==None: bail(parser
,'--mapping')
459 if options
.headsfile
==None: bail(parser
,'--heads')
460 if options
.statusfile
==None: bail(parser
,'--status')
461 if options
.repourl
==None: bail(parser
,'--repo')
464 if options
.authorfile
!=None:
465 a
=load_mapping('authors', options
.authorfile
)
468 if options
.branchesfile
!=None:
469 b
=load_mapping('branches', options
.branchesfile
)
472 if options
.tagsfile
!=None:
473 t
=load_mapping('tags', options
.tagsfile
)
475 if options
.default_branch
!=None:
476 set_default_branch(options
.default_branch
)
478 if options
.origin_name
!=None:
479 set_origin_name(options
.origin_name
)
482 if options
.encoding
!=None:
483 encoding
=options
.encoding
486 if options
.fn_encoding
!=None:
487 fn_encoding
=options
.fn_encoding
489 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
490 options
.headsfile
, options
.statusfile
,
491 authors
=a
,branchesmap
=b
,tagsmap
=t
,
492 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
493 notes
=options
.notes
,encoding
=encoding
,fn_encoding
=fn_encoding
))