3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
,set_unknown_addr
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
28 # ref manipulation regexs
29 ref_crud_re
= re
.compile(r
'[[\x00-\x1f\x7f ~^:\\*?]+', re
.S
)
30 ref_dotdot_re
= re
.compile(r
'\.\.')
31 ref_atbrace_re
= re
.compile(r
'@\{')
32 ref_dotlock_re
= re
.compile(r
'.*\.lock$', re
.I
)
33 ref_separators_re
= re
.compile(r
'/+')
34 ref_collapse_re
= re
.compile(r
'_+')
37 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
45 sys
.stdout
.write('\n')
46 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
48 def checkpoint(count
):
50 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
51 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
56 def revnum_to_revref(rev
, old_marks
):
57 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
59 return old_marks
.get(rev
) or ':%d' % (rev
+1)
61 def file_mismatch(f1
,f2
):
62 """See if two revisions of a file are not equal."""
63 return node
.hex(f1
)!=node
.hex(f2
)
65 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
66 """Loop over our repository and find all changed and missing files."""
67 for left
in dleft
.keys():
68 right
=dright
.get(left
,None)
70 # we have the file but our parent hasn't: add to left set
72 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
73 # we have it but checksums mismatch: add to center set
75 for right
in dright
.keys():
76 left
=dleft
.get(right
,None)
78 # if parent has file but we don't: add to right set
80 # change is already handled when comparing child against parent
83 def get_filechanges(repo
,revision
,parents
,mleft
):
84 """Given some repository and revision, find all changed/deleted files."""
88 mright
=repo
.changectx(p
).manifest()
89 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
95 def get_author(logmessage
,committer
,authors
):
96 """As git distincts between author and committer of a patch, try to
97 extract author by detecting Signed-off-by lines.
99 This walks from the end of the log message towards the top skipping
100 empty lines. Upon the first non-empty line, it walks all Signed-off-by
101 lines upwards to find the first one. For that (if found), it extracts
102 authorship information the usual way (authors table, cleaning, etc.)
104 If no Signed-off-by line is found, this defaults to the committer.
106 This may sound stupid (and it somehow is), but in log messages we
107 accidentially may have lines in the middle starting with
108 "Signed-off-by: foo" and thus matching our detection regex. Prevent
111 loglines
=logmessage
.split('\n')
113 # from tail walk to top skipping empty lines
116 if len(loglines
[i
].strip())==0: continue
119 # walk further upwards to find first sob line, store in 'first'
122 m
=sob_re
.match(loglines
[i
])
126 # if the last non-empty line matches our Signed-Off-by regex: extract username
128 r
=fixup_user(first
.group(1),authors
)
132 def export_file_contents(ctx
,manifest
,files
,hgtags
):
136 # Skip .hgtags files. They only get us in trouble.
137 if not hgtags
and file == ".hgtags":
138 sys
.stderr
.write('Skip %s\n' % (file))
140 d
=ctx
.filectx(file).data()
141 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
142 strip_leading_slash(file)))
143 wr('data %d' % len(d
)) # had some trouble with size()
146 if count
%cfg_export
_boundary
==0:
147 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
148 if max>cfg_export_boundary
:
149 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
151 def sanitize_name(name
,what
="branch",flatten
=False):
152 """Sanitize input roughly according to git-check-ref-format(1)"""
155 if len(name
) >= 1 and name
[0] == '.': return '_'+name
[1:]
159 # be paranoid just in case
163 n
= ref_crud_re
.sub('_', n
)
164 n
= ref_dotdot_re
.sub('_', n
)
165 n
= ref_atbrace_re
.sub('_{', n
)
166 if ref_dotlock_re
.match(n
):
167 n
= n
[:-5] + '_' + n
[-4:]
168 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
170 if n
[0] == '.': n
='_'+n
[1:]
171 n
= ref_separators_re
.sub('_', n
)
173 n
= '/'.join(map(dot
, n
.split('/')))
174 if n
[0] == '/': n
='_'+n
[1:]
175 n
= ref_separators_re
.sub('/', n
)
176 n
= ref_collapse_re
.sub('_', n
)
179 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
182 def strip_leading_slash(filename
):
183 if filename
[0] == '/':
187 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
,hgtags
,flatten
,notes
):
188 def get_branchname(name
):
189 if brmap
.has_key(name
):
191 n
=sanitize_name(name
,flatten
=flatten
)
195 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
197 branch
=get_branchname(branch
)
199 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
201 if len(parents
)==0 and revision
!= 0:
202 wr('reset refs/heads/%s' % branch
)
204 wr('commit refs/heads/%s' % branch
)
205 wr('mark :%d' % (revision
+1))
207 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
208 wr('committer %s %d %s' % (user
,time
,timezone
))
209 wr('data %d' % (len(desc
)+1)) # wtf?
213 ctx
=repo
.changectx(str(revision
))
215 added
,changed
,removed
,type=[],[],[],''
217 if len(parents
) == 0:
218 # first revision: feed in full manifest
223 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
224 if len(parents
) == 1:
225 # later non-merge revision: feed in changed manifest
226 # if we have exactly one parent, just take the changes from the
227 # manifest without expensively comparing checksums
228 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
229 added
,changed
,removed
=f
[1],f
[0],f
[2]
231 else: # a merge with two parents
232 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
233 # later merge revision: feed in changed manifest
234 # for many files comparing checksums is expensive so only do it for
235 # merges where we really need it due to hg's revlog logic
236 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
237 type='thorough delta'
239 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
240 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
242 removed
=[strip_leading_slash(x
) for x
in removed
]
244 map(lambda r
: wr('D %s' % r
),removed
)
245 export_file_contents(ctx
,man
,added
,hgtags
)
246 export_file_contents(ctx
,man
,changed
,hgtags
)
249 count
=checkpoint(count
)
250 count
=generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
)
253 def generate_note(user
,time
,timezone
,revision
,ctx
,count
,notes
):
256 wr('commit refs/notes/hg')
257 wr('committer %s %d %s' % (user
,time
,timezone
))
259 wr('N inline :%d' % (revision
+1))
261 wr('data %d' % (len(hg_hash
)))
264 return checkpoint(count
)
266 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,flatten
):
269 tag
=sanitize_name(tag
,"tag",flatten
=flatten
)
270 # ignore latest revision
271 if tag
=='tip': continue
272 # ignore tags to nodes that are missing (ie, 'in the future')
273 if node
.encode('hex_codec') not in mapping_cache
:
274 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
277 rev
=int(mapping_cache
[node
.encode('hex_codec')])
279 ref
=revnum_to_revref(rev
, old_marks
)
281 sys
.stderr
.write('Failed to find reference for creating tag'
282 ' %s at r%d\n' % (tag
,rev
))
284 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
285 wr('reset refs/tags/%s' % tag
)
288 count
=checkpoint(count
)
291 def load_authors(filename
):
293 if not os
.path
.exists(filename
):
298 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
299 for line
in f
.readlines():
302 if line
=='' or line
[0]=='#':
306 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
308 # put key:value in cache, key without ^:
309 cache
[m
.group(1).strip()]=m
.group(2).strip()
312 sys
.stderr
.write('Loaded %d authors\n' % a
)
315 def branchtip(repo
, heads
):
316 '''return the tipmost branch head in heads'''
318 for h
in reversed(heads
):
319 if 'close' not in repo
.changelog
.read(h
)[5]:
324 def verify_heads(ui
,repo
,cache
,force
):
326 for bn
, heads
in repo
.branchmap().iteritems():
327 branches
[bn
] = branchtip(repo
, heads
)
328 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
331 # get list of hg's branches to verify, don't take all git has
337 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
338 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
339 if not force
: return False
341 # verify that branch has exactly one head
343 for h
in repo
.heads():
344 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
345 if t
.get(branch
,False):
346 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
347 repo
.changelog
.rev(h
))
348 if not force
: return False
353 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False,hgtags
=False,flatten
=False,notes
=False):
356 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
357 mapping_cache
=load_cache(mappingfile
)
358 heads_cache
=load_cache(headsfile
)
359 state_cache
=load_cache(tipfile
)
361 ui
,repo
=setup_repo(repourl
)
363 if not verify_heads(ui
,repo
,heads_cache
,force
):
367 tip
=repo
.changelog
.count()
368 except AttributeError:
371 min=int(state_cache
.get('tip',0))
373 if _max
<0 or max>tip
:
376 for rev
in range(0,max):
377 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
378 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
383 for rev
in range(min,max):
384 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
,hgtags
,flatten
,notes
)
386 state_cache
['tip']=max
387 state_cache
['repo']=repourl
388 save_cache(tipfile
,state_cache
)
389 save_cache(mappingfile
,mapping_cache
)
391 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,flatten
)
393 sys
.stderr
.write('Issued %d commands\n' % c
)
397 if __name__
=='__main__':
398 def bail(parser
,opt
):
399 sys
.stderr
.write('Error: No %s option given\n' % opt
)
403 parser
=OptionParser()
405 parser
.add_option("-m","--max",type="int",dest
="max",
406 help="Maximum hg revision to import")
407 parser
.add_option("--mapping",dest
="mappingfile",
408 help="File to read last run's hg-to-git SHA1 mapping")
409 parser
.add_option("--marks",dest
="marksfile",
410 help="File to read git-fast-import's marks from")
411 parser
.add_option("--heads",dest
="headsfile",
412 help="File to read last run's git heads from")
413 parser
.add_option("--status",dest
="statusfile",
414 help="File to read status from")
415 parser
.add_option("-r","--repo",dest
="repourl",
416 help="URL of repo to import")
417 parser
.add_option("-s",action
="store_true",dest
="sob",
418 default
=False,help="Enable parsing Signed-off-by lines")
419 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
420 default
=False,help="Enable exporting .hgtags files")
421 parser
.add_option("--flatten",action
="store_true",dest
="flatten",
422 default
=False,help="Create one-level ref names (convert '/' to '_')")
423 parser
.add_option("-A","--authors",dest
="authorfile",
424 help="Read authormap from AUTHORFILE")
425 parser
.add_option("-U",dest
="unknown",
426 help="Email address to use for unknown instead of 'devnull@localhost'")
427 parser
.add_option("-f","--force",action
="store_true",dest
="force",
428 default
=False,help="Ignore validation errors by force")
429 parser
.add_option("-M","--default-branch",dest
="default_branch",
430 help="Set the default branch")
431 parser
.add_option("-o","--origin",dest
="origin_name",
432 help="use <name> as namespace to track upstream")
433 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
434 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
436 (options
,args
)=parser
.parse_args()
439 if options
.max!=None: m
=options
.max
441 if options
.marksfile
==None: bail(parser
,'--marks')
442 if options
.mappingfile
==None: bail(parser
,'--mapping')
443 if options
.headsfile
==None: bail(parser
,'--heads')
444 if options
.statusfile
==None: bail(parser
,'--status')
445 if options
.repourl
==None: bail(parser
,'--repo')
448 if options
.authorfile
!=None:
449 a
=load_authors(options
.authorfile
)
451 if options
.unknown
!=None:
452 if not set_unknown_addr(options
.unknown
):
453 sys
.stderr
.write("Error: Invalid email address '%s'\n" % options
.unknown
)
456 if options
.default_branch
!=None:
457 set_default_branch(options
.default_branch
)
459 if options
.origin_name
!=None:
460 set_origin_name(options
.origin_name
)
462 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,options
.headsfile
,
463 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,flatten
=options
.flatten
,notes
=options
.notes
))