3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
,set_unknown_addr
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
28 # ref manipulation regexs
29 ref_crud_re
= re
.compile(r
'[[\x00-\x1f\x7f ~^:\\*?]+', re
.S
)
30 ref_dotdot_re
= re
.compile(r
'\.\.')
31 ref_atbrace_re
= re
.compile(r
'@\{')
32 ref_dotlock_re
= re
.compile(r
'.*\.lock$', re
.I
)
33 ref_separators_re
= re
.compile(r
'/+')
34 ref_collapse_re
= re
.compile(r
'_+')
37 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
42 sys
.stdout
.write('\n')
43 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
45 def checkpoint(count
):
47 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
48 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
53 def revnum_to_revref(rev
, old_marks
):
54 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
56 return old_marks
.get(rev
) or ':%d' % (rev
+1)
58 def file_mismatch(f1
,f2
):
59 """See if two revisions of a file are not equal."""
60 return node
.hex(f1
)!=node
.hex(f2
)
62 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
63 """Loop over our repository and find all changed and missing files."""
64 for left
in dleft
.keys():
65 right
=dright
.get(left
,None)
67 # we have the file but our parent hasn't: add to left set
69 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
70 # we have it but checksums mismatch: add to center set
72 for right
in dright
.keys():
73 left
=dleft
.get(right
,None)
75 # if parent has file but we don't: add to right set
77 # change is already handled when comparing child against parent
80 def get_filechanges(repo
,revision
,parents
,mleft
):
81 """Given some repository and revision, find all changed/deleted files."""
85 mright
=repo
.changectx(p
).manifest()
86 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
92 def get_author(logmessage
,committer
,authors
):
93 """As git distincts between author and committer of a patch, try to
94 extract author by detecting Signed-off-by lines.
96 This walks from the end of the log message towards the top skipping
97 empty lines. Upon the first non-empty line, it walks all Signed-off-by
98 lines upwards to find the first one. For that (if found), it extracts
99 authorship information the usual way (authors table, cleaning, etc.)
101 If no Signed-off-by line is found, this defaults to the committer.
103 This may sound stupid (and it somehow is), but in log messages we
104 accidentially may have lines in the middle starting with
105 "Signed-off-by: foo" and thus matching our detection regex. Prevent
108 loglines
=logmessage
.split('\n')
110 # from tail walk to top skipping empty lines
113 if len(loglines
[i
].strip())==0: continue
116 # walk further upwards to find first sob line, store in 'first'
119 m
=sob_re
.match(loglines
[i
])
123 # if the last non-empty line matches our Signed-Off-by regex: extract username
125 r
=fixup_user(first
.group(1),authors
)
129 def export_file_contents(ctx
,manifest
,files
,hgtags
):
133 # Skip .hgtags files. They only get us in trouble.
134 if not hgtags
and file == ".hgtags":
135 sys
.stderr
.write('Skip %s\n' % (file))
137 d
=ctx
.filectx(file).data()
138 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),file))
139 wr('data %d' % len(d
)) # had some trouble with size()
142 if count
%cfg_export
_boundary
==0:
143 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
144 if max>cfg_export_boundary
:
145 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
147 def sanitize_name(name
,what
="branch"):
148 """Sanitize input roughly according to git-check-ref-format(1)"""
151 if len(name
) >= 1 and name
[0] == '.': return '_'+name
[1:]
155 # be paranoid just in case
159 n
= ref_crud_re
.sub('_', n
)
160 n
= ref_dotdot_re
.sub('_', n
)
161 n
= ref_atbrace_re
.sub('_{', n
)
162 if ref_dotlock_re
.match(n
):
163 n
= n
[:-5] + '_' + n
[-4:]
164 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
165 n
='/'.join(map(dot
,n
.split('/')))
166 if n
[0] == '/': n
='_'+n
[1:]
167 n
= ref_separators_re
.sub('/', n
)
168 n
= ref_collapse_re
.sub('_', n
)
171 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
174 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,sob
,brmap
,hgtags
):
175 def get_branchname(name
):
176 if brmap
.has_key(name
):
178 n
=sanitize_name(name
)
182 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
184 branch
=get_branchname(branch
)
186 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
188 if len(parents
)==0 and revision
!= 0:
189 wr('reset refs/heads/%s' % branch
)
191 wr('commit refs/heads/%s' % branch
)
192 wr('mark :%d' % (revision
+1))
194 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
195 wr('committer %s %d %s' % (user
,time
,timezone
))
196 wr('data %d' % (len(desc
)+1)) # wtf?
200 ctx
=repo
.changectx(str(revision
))
202 added
,changed
,removed
,type=[],[],[],''
204 if len(parents
) == 0:
205 # first revision: feed in full manifest
210 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
211 if len(parents
) == 1:
212 # later non-merge revision: feed in changed manifest
213 # if we have exactly one parent, just take the changes from the
214 # manifest without expensively comparing checksums
215 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
216 added
,changed
,removed
=f
[1],f
[0],f
[2]
218 else: # a merge with two parents
219 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
220 # later merge revision: feed in changed manifest
221 # for many files comparing checksums is expensive so only do it for
222 # merges where we really need it due to hg's revlog logic
223 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
224 type='thorough delta'
226 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
227 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
229 map(lambda r
: wr('D %s' % r
),removed
)
230 export_file_contents(ctx
,man
,added
,hgtags
)
231 export_file_contents(ctx
,man
,changed
,hgtags
)
234 return checkpoint(count
)
236 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
):
239 tag
=sanitize_name(tag
,"tag")
240 # ignore latest revision
241 if tag
=='tip': continue
242 # ignore tags to nodes that are missing (ie, 'in the future')
243 if node
.encode('hex_codec') not in mapping_cache
:
244 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
247 rev
=int(mapping_cache
[node
.encode('hex_codec')])
249 ref
=revnum_to_revref(rev
, old_marks
)
251 sys
.stderr
.write('Failed to find reference for creating tag'
252 ' %s at r%d\n' % (tag
,rev
))
254 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
255 wr('reset refs/tags/%s' % tag
)
258 count
=checkpoint(count
)
261 def load_authors(filename
):
263 if not os
.path
.exists(filename
):
268 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
269 for line
in f
.readlines():
272 if line
=='' or line
[0]=='#':
276 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
278 # put key:value in cache, key without ^:
279 cache
[m
.group(1).strip()]=m
.group(2).strip()
282 sys
.stderr
.write('Loaded %d authors\n' % a
)
285 def branchtip(repo
, heads
):
286 '''return the tipmost branch head in heads'''
288 for h
in reversed(heads
):
289 if 'close' not in repo
.changelog
.read(h
)[5]:
294 def verify_heads(ui
,repo
,cache
,force
):
296 for bn
, heads
in repo
.branchmap().iteritems():
297 branches
[bn
] = branchtip(repo
, heads
)
298 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
301 # get list of hg's branches to verify, don't take all git has
307 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
308 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
309 if not force
: return False
311 # verify that branch has exactly one head
313 for h
in repo
.heads():
314 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
315 if t
.get(branch
,False):
316 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
317 repo
.changelog
.rev(h
))
318 if not force
: return False
323 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False,hgtags
=False):
326 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
327 mapping_cache
=load_cache(mappingfile
)
328 heads_cache
=load_cache(headsfile
)
329 state_cache
=load_cache(tipfile
)
331 ui
,repo
=setup_repo(repourl
)
333 if not verify_heads(ui
,repo
,heads_cache
,force
):
337 tip
=repo
.changelog
.count()
338 except AttributeError:
341 min=int(state_cache
.get('tip',0))
343 if _max
<0 or max>tip
:
346 for rev
in range(0,max):
347 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
348 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
353 for rev
in range(min,max):
354 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,sob
,brmap
,hgtags
)
356 state_cache
['tip']=max
357 state_cache
['repo']=repourl
358 save_cache(tipfile
,state_cache
)
359 save_cache(mappingfile
,mapping_cache
)
361 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
)
363 sys
.stderr
.write('Issued %d commands\n' % c
)
367 if __name__
=='__main__':
368 def bail(parser
,opt
):
369 sys
.stderr
.write('Error: No %s option given\n' % opt
)
373 parser
=OptionParser()
375 parser
.add_option("-m","--max",type="int",dest
="max",
376 help="Maximum hg revision to import")
377 parser
.add_option("--mapping",dest
="mappingfile",
378 help="File to read last run's hg-to-git SHA1 mapping")
379 parser
.add_option("--marks",dest
="marksfile",
380 help="File to read git-fast-import's marks from")
381 parser
.add_option("--heads",dest
="headsfile",
382 help="File to read last run's git heads from")
383 parser
.add_option("--status",dest
="statusfile",
384 help="File to read status from")
385 parser
.add_option("-r","--repo",dest
="repourl",
386 help="URL of repo to import")
387 parser
.add_option("-s",action
="store_true",dest
="sob",
388 default
=False,help="Enable parsing Signed-off-by lines")
389 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
390 default
=False,help="Enable exporting .hgtags files")
391 parser
.add_option("-A","--authors",dest
="authorfile",
392 help="Read authormap from AUTHORFILE")
393 parser
.add_option("-U",dest
="unknown",
394 help="Email address to use for unknown instead of 'devnull@localhost'")
395 parser
.add_option("-f","--force",action
="store_true",dest
="force",
396 default
=False,help="Ignore validation errors by force")
397 parser
.add_option("-M","--default-branch",dest
="default_branch",
398 help="Set the default branch")
399 parser
.add_option("-o","--origin",dest
="origin_name",
400 help="use <name> as namespace to track upstream")
402 (options
,args
)=parser
.parse_args()
405 if options
.max!=None: m
=options
.max
407 if options
.marksfile
==None: bail(parser
,'--marks')
408 if options
.mappingfile
==None: bail(parser
,'--mapping')
409 if options
.headsfile
==None: bail(parser
,'--heads')
410 if options
.statusfile
==None: bail(parser
,'--status')
411 if options
.repourl
==None: bail(parser
,'--repo')
414 if options
.authorfile
!=None:
415 a
=load_authors(options
.authorfile
)
417 if options
.unknown
!=None:
418 if not set_unknown_addr(options
.unknown
):
419 sys
.stderr
.write("Error: Invalid email address '%s'\n" % options
.unknown
)
422 if options
.default_branch
!=None:
423 set_default_branch(options
.default_branch
)
425 if options
.origin_name
!=None:
426 set_origin_name(options
.origin_name
)
428 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,options
.headsfile
,
429 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
))