3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
8 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
9 from optparse
import OptionParser
14 if sys
.platform
== "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count
=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary
=1000
30 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
38 sys
.stdout
.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count
):
43 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
44 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
49 def revnum_to_revref(rev
, old_marks
):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks
.get(rev
) or ':%d' % (rev
+1)
54 def file_mismatch(f1
,f2
):
55 """See if two revisions of a file are not equal."""
56 return node
.hex(f1
)!=node
.hex(f2
)
58 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
59 """Loop over our repository and find all changed and missing files."""
60 for left
in dleft
.keys():
61 right
=dright
.get(left
,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
66 # we have it but checksums mismatch: add to center set
68 for right
in dright
.keys():
69 left
=dleft
.get(right
,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo
,revision
,parents
,mleft
):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright
=repo
.changectx(p
).manifest()
82 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
88 def get_author(logmessage
,committer
,authors
):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines
=logmessage
.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines
[i
].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m
=sob_re
.match(loglines
[i
])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r
=fixup_user(first
.group(1),authors
)
125 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
=''):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags
and file == ".hgtags":
131 sys
.stderr
.write('Skip %s\n' % (file))
133 d
=ctx
.filectx(file).data()
135 filename
=file.decode(encoding
).encode('utf8')
138 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
139 strip_leading_slash(filename
)))
140 wr('data %d' % len(d
)) # had some trouble with size()
143 if count
%cfg_export
_boundary
==0:
144 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
145 if max>cfg_export_boundary
:
146 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
148 def sanitize_name(name
,what
="branch", mapping
={}):
149 """Sanitize input roughly according to git-check-ref-format(1)"""
151 # NOTE: Do not update this transform to work around
152 # incompatibilities on your platform. If you change it and it starts
153 # modifying names which previously were not touched it will break
154 # preexisting setups which are doing incremental imports.
156 # Fast-export tries to not inflict arbitrary naming policy on the
157 # user, instead it aims to provide mechanisms allowing the user to
158 # apply their own policy. Therefore do not add a transform which can
159 # already be implemented with the -B and -T options to mangle branch
160 # and tag names. If you have a source repository where this is too
161 # much work to do manually, write a tool that does it for you.
165 if not name
: return name
166 if name
[0] == '.': return '_'+name
[1:]
169 n
=mapping
.get(name
,name
)
170 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
172 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
173 n
='/'.join(map(dot
,n
.split('/')))
178 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
181 def strip_leading_slash(filename
):
182 if filename
[0] == '/':
186 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,
187 branchesmap
,sob
,brmap
,hgtags
,encoding
='',fn_encoding
=''):
188 def get_branchname(name
):
189 if brmap
.has_key(name
):
191 n
=sanitize_name(name
, "branch", branchesmap
)
195 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
197 branch
=get_branchname(branch
)
199 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
201 if len(parents
)==0 and revision
!= 0:
202 wr('reset refs/heads/%s' % branch
)
204 wr('commit refs/heads/%s' % branch
)
205 wr('mark :%d' % (revision
+1))
207 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
208 wr('committer %s %d %s' % (user
,time
,timezone
))
209 wr('data %d' % (len(desc
)+1)) # wtf?
213 ctx
=repo
.changectx(str(revision
))
215 added
,changed
,removed
,type=[],[],[],''
217 if len(parents
) == 0:
218 # first revision: feed in full manifest
223 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
224 if len(parents
) == 1:
225 # later non-merge revision: feed in changed manifest
226 # if we have exactly one parent, just take the changes from the
227 # manifest without expensively comparing checksums
228 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
229 added
,changed
,removed
=f
[1],f
[0],f
[2]
231 else: # a merge with two parents
232 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
233 # later merge revision: feed in changed manifest
234 # for many files comparing checksums is expensive so only do it for
235 # merges where we really need it due to hg's revlog logic
236 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
237 type='thorough delta'
239 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
240 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
243 removed
=[r
.decode(fn_encoding
).encode('utf8') for r
in removed
]
245 removed
=[strip_leading_slash(x
) for x
in removed
]
247 map(lambda r
: wr('D %s' % r
),removed
)
248 export_file_contents(ctx
,man
,added
,hgtags
,fn_encoding
)
249 export_file_contents(ctx
,man
,changed
,hgtags
,fn_encoding
)
252 return checkpoint(count
)
254 def export_note(ui
,repo
,revision
,count
,authors
,encoding
,is_first
):
255 (revnode
,_
,user
,(time
,timezone
),_
,_
,_
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
257 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
259 wr('commit refs/notes/hg')
260 wr('committer %s %d %s' % (user
,time
,timezone
))
263 wr('from refs/notes/hg^0')
264 wr('N inline :%d' % (revision
+1))
265 hg_hash
=repo
.changectx(str(revision
)).hex()
266 wr('data %d' % (len(hg_hash
)))
269 return checkpoint(count
)
271 wr('data %d' % (len(desc
)+1)) # wtf?
275 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,tagsmap
):
278 # Remap the branch name
279 tag
=sanitize_name(tag
,"tag",tagsmap
)
280 # ignore latest revision
281 if tag
=='tip': continue
282 # ignore tags to nodes that are missing (ie, 'in the future')
283 if node
.encode('hex_codec') not in mapping_cache
:
284 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
287 rev
=int(mapping_cache
[node
.encode('hex_codec')])
289 ref
=revnum_to_revref(rev
, old_marks
)
291 sys
.stderr
.write('Failed to find reference for creating tag'
292 ' %s at r%d\n' % (tag
,rev
))
294 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
295 wr('reset refs/tags/%s' % tag
)
298 count
=checkpoint(count
)
301 def load_mapping(name
, filename
, mapping_is_raw
):
302 raw_regexp
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
303 string_regexp
='"(((\\.)|(\\")|[^"])*)"'
304 quoted_regexp
=re
.compile('^'+string_regexp
+'[ ]*=[ ]*'+string_regexp
+'$')
306 def parse_raw_line(line
):
307 m
=raw_regexp
.match(line
)
310 return (m
.group(1).strip(), m
.group(2).strip())
312 def parse_quoted_line(line
):
313 m
=quoted_regexp
.match(line
)
316 return (m
.group(1).decode('string_escape'),
317 m
.group(5).decode('string_escape'))
320 if not os
.path
.exists(filename
):
321 sys
.stderr
.write('Could not open mapping file [%s]\n' % (filename
))
326 for line
in f
.readlines():
329 if l
==1 and line
[0]=='#' and line
=='# quoted-escaped-strings':
331 elif line
=='' or line
[0]=='#':
333 m
=parse_raw_line(line
) if mapping_is_raw
else parse_quoted_line(line
)
335 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
337 # put key:value in cache, key without ^:
341 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
344 def branchtip(repo
, heads
):
345 '''return the tipmost branch head in heads'''
347 for h
in reversed(heads
):
348 if 'close' not in repo
.changelog
.read(h
)[5]:
353 def verify_heads(ui
,repo
,cache
,force
,branchesmap
):
355 for bn
, heads
in repo
.branchmap().iteritems():
356 branches
[bn
] = branchtip(repo
, heads
)
357 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
360 # get list of hg's branches to verify, don't take all git has
363 sanitized_name
=sanitize_name(b
,"branch",branchesmap
)
364 sha1
=get_git_sha1(sanitized_name
)
365 c
=cache
.get(sanitized_name
)
367 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
368 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
369 if not force
: return False
371 # verify that branch has exactly one head
373 for h
in repo
.heads():
374 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
375 if t
.get(branch
,False):
376 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
377 repo
.changelog
.rev(h
))
378 if not force
: return False
383 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,
384 authors
={},branchesmap
={},tagsmap
={},
385 sob
=False,force
=False,hgtags
=False,notes
=False,encoding
='',fn_encoding
=''):
386 def check_cache(filename
, contents
):
387 if len(contents
) == 0:
388 sys
.stderr
.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename
)
392 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
393 mapping_cache
=load_cache(mappingfile
)
394 heads_cache
=load_cache(headsfile
)
395 state_cache
=load_cache(tipfile
)
397 if len(state_cache
) != 0:
398 for (name
, data
) in [(marksfile
, old_marks
),
399 (mappingfile
, mapping_cache
),
400 (headsfile
, state_cache
)]:
401 check_cache(name
, data
)
403 ui
,repo
=setup_repo(repourl
)
405 if not verify_heads(ui
,repo
,heads_cache
,force
,branchesmap
):
409 tip
=repo
.changelog
.count()
410 except AttributeError:
413 min=int(state_cache
.get('tip',0))
415 if _max
<0 or max>tip
:
418 for rev
in range(0,max):
419 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
420 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
425 for rev
in range(min,max):
426 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,branchesmap
,
427 sob
,brmap
,hgtags
,encoding
,fn_encoding
)
429 for rev
in range(min,max):
430 c
=export_note(ui
,repo
,rev
,c
,authors
, encoding
, rev
== min and min != 0)
432 state_cache
['tip']=max
433 state_cache
['repo']=repourl
434 save_cache(tipfile
,state_cache
)
435 save_cache(mappingfile
,mapping_cache
)
437 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,tagsmap
)
439 sys
.stderr
.write('Issued %d commands\n' % c
)
443 if __name__
=='__main__':
444 def bail(parser
,opt
):
445 sys
.stderr
.write('Error: No %s option given\n' % opt
)
449 parser
=OptionParser()
451 parser
.add_option("-m","--max",type="int",dest
="max",
452 help="Maximum hg revision to import")
453 parser
.add_option("--mapping",dest
="mappingfile",
454 help="File to read last run's hg-to-git SHA1 mapping")
455 parser
.add_option("--marks",dest
="marksfile",
456 help="File to read git-fast-import's marks from")
457 parser
.add_option("--heads",dest
="headsfile",
458 help="File to read last run's git heads from")
459 parser
.add_option("--status",dest
="statusfile",
460 help="File to read status from")
461 parser
.add_option("-r","--repo",dest
="repourl",
462 help="URL of repo to import")
463 parser
.add_option("-s",action
="store_true",dest
="sob",
464 default
=False,help="Enable parsing Signed-off-by lines")
465 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
466 default
=False,help="Enable exporting .hgtags files")
467 parser
.add_option("-A","--authors",dest
="authorfile",
468 help="Read authormap from AUTHORFILE")
469 parser
.add_option("-B","--branches",dest
="branchesfile",
470 help="Read branch map from BRANCHESFILE")
471 parser
.add_option("-T","--tags",dest
="tagsfile",
472 help="Read tags map from TAGSFILE")
473 parser
.add_option("-f","--force",action
="store_true",dest
="force",
474 default
=False,help="Ignore validation errors by force")
475 parser
.add_option("-M","--default-branch",dest
="default_branch",
476 help="Set the default branch")
477 parser
.add_option("-o","--origin",dest
="origin_name",
478 help="use <name> as namespace to track upstream")
479 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
480 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
481 parser
.add_option("-e",dest
="encoding",
482 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
483 parser
.add_option("--fe",dest
="fn_encoding",
484 help="Assume file names from Mercurial are encoded in <filename_encoding>")
485 parser
.add_option("--mappings-are-raw",dest
="raw_mappings", default
=False,
486 help="Assume mappings are raw <key>=<value> lines")
488 (options
,args
)=parser
.parse_args()
491 if options
.max!=None: m
=options
.max
493 if options
.marksfile
==None: bail(parser
,'--marks')
494 if options
.mappingfile
==None: bail(parser
,'--mapping')
495 if options
.headsfile
==None: bail(parser
,'--heads')
496 if options
.statusfile
==None: bail(parser
,'--status')
497 if options
.repourl
==None: bail(parser
,'--repo')
500 if options
.authorfile
!=None:
501 a
=load_mapping('authors', options
.authorfile
, options
.raw_mappings
)
504 if options
.branchesfile
!=None:
505 b
=load_mapping('branches', options
.branchesfile
, options
.raw_mappings
)
508 if options
.tagsfile
!=None:
509 t
=load_mapping('tags', options
.tagsfile
, True)
511 if options
.default_branch
!=None:
512 set_default_branch(options
.default_branch
)
514 if options
.origin_name
!=None:
515 set_origin_name(options
.origin_name
)
518 if options
.encoding
!=None:
519 encoding
=options
.encoding
522 if options
.fn_encoding
!=None:
523 fn_encoding
=options
.fn_encoding
525 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
526 options
.headsfile
, options
.statusfile
,
527 authors
=a
,branchesmap
=b
,tagsmap
=t
,
528 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
529 notes
=options
.notes
,encoding
=encoding
,fn_encoding
=fn_encoding
))