3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
38 sys
.stdout
.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count
):
43 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
44 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
49 def revnum_to_revref(rev
, old_marks
):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks
.get(rev
) or ':%d' % (rev
+1)
54 def file_mismatch(f1
,f2
):
55 """See if two revisions of a file are not equal."""
56 return node
.hex(f1
)!=node
.hex(f2
)
58 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
59 """Loop over our repository and find all changed and missing files."""
60 for left
in dleft
.keys():
61 right
=dright
.get(left
,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
66 # we have it but checksums mismatch: add to center set
68 for right
in dright
.keys():
69 left
=dleft
.get(right
,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo
,revision
,parents
,mleft
):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright
=repo
.changectx(p
).manifest()
82 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
88 def get_author(logmessage
,committer
,authors
):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines
=logmessage
.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines
[i
].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m
=sob_re
.match(loglines
[i
])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r
=fixup_user(first
.group(1),authors
)
125 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
=''):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags
and file == ".hgtags":
131 sys
.stderr
.write('Skip %s\n' % (file))
133 d
=ctx
.filectx(file).data()
135 filename
=file.decode(encoding
).encode('utf8')
138 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
139 strip_leading_slash(filename
)))
140 wr('data %d' % len(d
)) # had some trouble with size()
143 if count
%cfg_export
_boundary
==0:
144 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
145 if max>cfg_export_boundary
:
146 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
148 def sanitize_name(name
,what
="branch"):
149 """Sanitize input roughly according to git-check-ref-format(1)"""
152 if name
[0] == '.': return '_'+name
[1:]
156 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
158 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
159 n
='/'.join(map(dot
,n
.split('/')))
164 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
167 def strip_leading_slash(filename
):
168 if filename
[0] == '/':
172 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
,hgtags
,notes
,encoding
=''):
173 def get_branchname(name
):
174 if brmap
.has_key(name
):
176 n
=sanitize_name(name
)
180 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
182 branch
=get_branchname(branch
)
184 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
186 if len(parents
)==0 and revision
!= 0:
187 wr('reset refs/heads/%s' % branch
)
189 wr('commit refs/heads/%s' % branch
)
190 wr('mark :%d' % (revision
+1))
192 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
193 wr('committer %s %d %s' % (user
,time
,timezone
))
194 wr('data %d' % (len(desc
)+1)) # wtf?
198 ctx
=repo
.changectx(str(revision
))
200 added
,changed
,removed
,type=[],[],[],''
202 if len(parents
) == 0:
203 # first revision: feed in full manifest
208 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
209 if len(parents
) == 1:
210 # later non-merge revision: feed in changed manifest
211 # if we have exactly one parent, just take the changes from the
212 # manifest without expensively comparing checksums
213 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
214 added
,changed
,removed
=f
[1],f
[0],f
[2]
216 else: # a merge with two parents
217 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
218 # later merge revision: feed in changed manifest
219 # for many files comparing checksums is expensive so only do it for
220 # merges where we really need it due to hg's revlog logic
221 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
222 type='thorough delta'
224 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
225 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
228 removed
=[r
.decode(encoding
).encode('utf8') for r
in removed
]
230 removed
=[strip_leading_slash(x
) for x
in removed
]
232 map(lambda r
: wr('D %s' % r
),removed
)
233 export_file_contents(ctx
,man
,added
,hgtags
,encoding
)
234 export_file_contents(ctx
,man
,changed
,hgtags
,encoding
)
237 count
=checkpoint(count
)
238 count
=generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
)
241 def generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
):
244 wr('commit refs/notes/hg')
245 wr('committer %s %d %s' % (user
,time
,timezone
))
247 wr('N inline :%d' % (revision
+1))
249 wr('data %d' % (len(hg_hash
)))
252 return checkpoint(count
)
254 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
):
257 tag
=sanitize_name(tag
,"tag")
258 # ignore latest revision
259 if tag
=='tip': continue
260 # ignore tags to nodes that are missing (ie, 'in the future')
261 if node
.encode('hex_codec') not in mapping_cache
:
262 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
265 rev
=int(mapping_cache
[node
.encode('hex_codec')])
267 ref
=revnum_to_revref(rev
, old_marks
)
269 sys
.stderr
.write('Failed to find reference for creating tag'
270 ' %s at r%d\n' % (tag
,rev
))
272 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
273 wr('reset refs/tags/%s' % tag
)
276 count
=checkpoint(count
)
279 def load_mapping(name
, filename
):
281 if not os
.path
.exists(filename
):
286 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
287 for line
in f
.readlines():
290 if line
=='' or line
[0]=='#':
294 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
296 # put key:value in cache, key without ^:
297 cache
[m
.group(1).strip()]=m
.group(2).strip()
300 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
303 def branchtip(repo
, heads
):
304 '''return the tipmost branch head in heads'''
306 for h
in reversed(heads
):
307 if 'close' not in repo
.changelog
.read(h
)[5]:
312 def verify_heads(ui
,repo
,cache
,force
):
314 for bn
, heads
in repo
.branchmap().iteritems():
315 branches
[bn
] = branchtip(repo
, heads
)
316 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
319 # get list of hg's branches to verify, don't take all git has
325 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
326 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
327 if not force
: return False
329 # verify that branch has exactly one head
331 for h
in repo
.heads():
332 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
333 if t
.get(branch
,False):
334 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
335 repo
.changelog
.rev(h
))
336 if not force
: return False
341 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False,hgtags
=False,notes
=False,encoding
=''):
344 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
345 mapping_cache
=load_cache(mappingfile
)
346 heads_cache
=load_cache(headsfile
)
347 state_cache
=load_cache(tipfile
)
349 ui
,repo
=setup_repo(repourl
)
351 if not verify_heads(ui
,repo
,heads_cache
,force
):
355 tip
=repo
.changelog
.count()
356 except AttributeError:
359 min=int(state_cache
.get('tip',0))
361 if _max
<0 or max>tip
:
364 for rev
in range(0,max):
365 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
366 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
371 for rev
in range(min,max):
372 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
,hgtags
,notes
,encoding
)
374 state_cache
['tip']=max
375 state_cache
['repo']=repourl
376 save_cache(tipfile
,state_cache
)
377 save_cache(mappingfile
,mapping_cache
)
379 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
)
381 sys
.stderr
.write('Issued %d commands\n' % c
)
385 if __name__
=='__main__':
386 def bail(parser
,opt
):
387 sys
.stderr
.write('Error: No %s option given\n' % opt
)
391 parser
=OptionParser()
393 parser
.add_option("-m","--max",type="int",dest
="max",
394 help="Maximum hg revision to import")
395 parser
.add_option("--mapping",dest
="mappingfile",
396 help="File to read last run's hg-to-git SHA1 mapping")
397 parser
.add_option("--marks",dest
="marksfile",
398 help="File to read git-fast-import's marks from")
399 parser
.add_option("--heads",dest
="headsfile",
400 help="File to read last run's git heads from")
401 parser
.add_option("--status",dest
="statusfile",
402 help="File to read status from")
403 parser
.add_option("-r","--repo",dest
="repourl",
404 help="URL of repo to import")
405 parser
.add_option("-s",action
="store_true",dest
="sob",
406 default
=False,help="Enable parsing Signed-off-by lines")
407 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
408 default
=False,help="Enable exporting .hgtags files")
409 parser
.add_option("-A","--authors",dest
="authorfile",
410 help="Read authormap from AUTHORFILE")
411 parser
.add_option("-f","--force",action
="store_true",dest
="force",
412 default
=False,help="Ignore validation errors by force")
413 parser
.add_option("-M","--default-branch",dest
="default_branch",
414 help="Set the default branch")
415 parser
.add_option("-o","--origin",dest
="origin_name",
416 help="use <name> as namespace to track upstream")
417 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
418 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
419 parser
.add_option("-e",dest
="encoding",
420 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
422 (options
,args
)=parser
.parse_args()
425 if options
.max!=None: m
=options
.max
427 if options
.marksfile
==None: bail(parser
,'--marks')
428 if options
.mappingfile
==None: bail(parser
,'--mapping')
429 if options
.headsfile
==None: bail(parser
,'--heads')
430 if options
.statusfile
==None: bail(parser
,'--status')
431 if options
.repourl
==None: bail(parser
,'--repo')
434 if options
.authorfile
!=None:
435 a
=load_mapping('authors', options
.authorfile
)
437 if options
.default_branch
!=None:
438 set_default_branch(options
.default_branch
)
440 if options
.origin_name
!=None:
441 set_origin_name(options
.origin_name
)
444 if options
.encoding
!=None:
445 encoding
=options
.encoding
447 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
448 options
.headsfile
, options
.statusfile
,authors
=a
,
449 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
450 notes
=options
.notes
,encoding
=encoding
))