3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from mercurial
.scmutil
import revsymbol
8 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
9 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
10 from optparse
import OptionParser
16 if sys
.platform
== "win32":
17 # On Windows, sys.stdout is initially opened in text mode, which means that
18 # when a LF (\n) character is written to sys.stdout, it will be converted
19 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
20 # code to change the mode of sys.stdout to binary.
22 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
24 # silly regex to catch Signed-off-by lines in log message
25 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
26 # insert 'checkpoint' command after this many commits or none at all if 0
27 cfg_checkpoint_count
=0
28 # write some progress message every this many file contents written
29 cfg_export_boundary
=1000
32 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
40 sys
.stdout
.write('\n')
41 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
43 def checkpoint(count
):
45 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
46 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
51 def revnum_to_revref(rev
, old_marks
):
52 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
54 return old_marks
.get(rev
) or ':%d' % (rev
+1)
56 def file_mismatch(f1
,f2
):
57 """See if two revisions of a file are not equal."""
58 return node
.hex(f1
)!=node
.hex(f2
)
60 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
61 """Loop over our repository and find all changed and missing files."""
62 for left
in dleft
.keys():
63 right
=dright
.get(left
,None)
65 # we have the file but our parent hasn't: add to left set
67 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
68 # we have it but checksums mismatch: add to center set
70 for right
in dright
.keys():
71 left
=dleft
.get(right
,None)
73 # if parent has file but we don't: add to right set
75 # change is already handled when comparing child against parent
78 def get_filechanges(repo
,revision
,parents
,mleft
):
79 """Given some repository and revision, find all changed/deleted files."""
83 mright
=revsymbol(repo
,str(p
)).manifest()
84 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
90 def get_author(logmessage
,committer
,authors
):
91 """As git distincts between author and committer of a patch, try to
92 extract author by detecting Signed-off-by lines.
94 This walks from the end of the log message towards the top skipping
95 empty lines. Upon the first non-empty line, it walks all Signed-off-by
96 lines upwards to find the first one. For that (if found), it extracts
97 authorship information the usual way (authors table, cleaning, etc.)
99 If no Signed-off-by line is found, this defaults to the committer.
101 This may sound stupid (and it somehow is), but in log messages we
102 accidentially may have lines in the middle starting with
103 "Signed-off-by: foo" and thus matching our detection regex. Prevent
106 loglines
=logmessage
.split('\n')
108 # from tail walk to top skipping empty lines
111 if len(loglines
[i
].strip())==0: continue
114 # walk further upwards to find first sob line, store in 'first'
117 m
=sob_re
.match(loglines
[i
])
121 # if the last non-empty line matches our Signed-Off-by regex: extract username
123 r
=fixup_user(first
.group(1),authors
)
127 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
='',filter_contents
=None,plugins
={}):
131 # Skip .hgtags files. They only get us in trouble.
132 if not hgtags
and file == ".hgtags":
133 sys
.stderr
.write('Skip %s\n' % (file))
136 filename
=file.decode(encoding
).encode('utf8')
139 file_ctx
=ctx
.filectx(file)
143 filter_cmd
=filter_contents
+ [filename
,node
.hex(file_ctx
.filenode()),'1' if file_ctx
.isbinary() else '0']
145 filter_proc
=subprocess
.Popen(filter_cmd
,stdin
=subprocess
.PIPE
,stdout
=subprocess
.PIPE
)
146 d
,_
=filter_proc
.communicate(d
)
148 sys
.stderr
.write('Running filter-contents %s:\n' % filter_cmd
)
150 filter_ret
=filter_proc
.poll()
152 raise subprocess
.CalledProcessError(filter_ret
,filter_cmd
)
154 if plugins
and plugins
['file_data_filters']:
155 file_data
= {'filename':filename
,'file_ctx':file_ctx
,'data':d
}
156 for filter in plugins
['file_data_filters']:
159 filename
=file_data
['filename']
160 file_ctx
=file_data
['file_ctx']
162 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
163 strip_leading_slash(filename
)))
164 wr('data %d' % len(d
)) # had some trouble with size()
167 if count
%cfg_export
_boundary
==0:
168 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
169 if max>cfg_export_boundary
:
170 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
172 def sanitize_name(name
,what
="branch", mapping
={}):
173 """Sanitize input roughly according to git-check-ref-format(1)"""
175 # NOTE: Do not update this transform to work around
176 # incompatibilities on your platform. If you change it and it starts
177 # modifying names which previously were not touched it will break
178 # preexisting setups which are doing incremental imports.
180 # Fast-export tries to not inflict arbitrary naming policy on the
181 # user, instead it aims to provide mechanisms allowing the user to
182 # apply their own policy. Therefore do not add a transform which can
183 # already be implemented with the -B and -T options to mangle branch
184 # and tag names. If you have a source repository where this is too
185 # much work to do manually, write a tool that does it for you.
189 if not name
: return name
190 if name
[0] == '.': return '_'+name
[1:]
193 n
=mapping
.get(name
,name
)
194 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
196 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
197 n
='/'.join(map(dot
,n
.split('/')))
202 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
205 def strip_leading_slash(filename
):
206 if filename
[0] == '/':
210 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,
211 branchesmap
,sob
,brmap
,hgtags
,encoding
='',fn_encoding
='',filter_contents
=None,
213 def get_branchname(name
):
214 if brmap
.has_key(name
):
216 n
=sanitize_name(name
, "branch", branchesmap
)
220 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
222 branch
=get_branchname(branch
)
224 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
225 author
= get_author(desc
,user
,authors
)
227 if plugins
and plugins
['commit_message_filters']:
228 commit_data
= {'branch': branch
, 'parents': parents
, 'author': author
, 'desc': desc
}
229 for filter in plugins
['commit_message_filters']:
231 branch
= commit_data
['branch']
232 parents
= commit_data
['parents']
233 author
= commit_data
['author']
234 desc
= commit_data
['desc']
236 if len(parents
)==0 and revision
!= 0:
237 wr('reset refs/heads/%s' % branch
)
239 wr('commit refs/heads/%s' % branch
)
240 wr('mark :%d' % (revision
+1))
242 wr('author %s %d %s' % (author
,time
,timezone
))
243 wr('committer %s %d %s' % (user
,time
,timezone
))
244 wr('data %d' % (len(desc
)+1)) # wtf?
248 ctx
=revsymbol(repo
,str(revision
))
250 added
,changed
,removed
,type=[],[],[],''
252 if len(parents
) == 0:
253 # first revision: feed in full manifest
258 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
259 if len(parents
) == 1:
260 # later non-merge revision: feed in changed manifest
261 # if we have exactly one parent, just take the changes from the
262 # manifest without expensively comparing checksums
263 f
=repo
.status(parents
[0],revnode
)[:3]
264 added
,changed
,removed
=f
[1],f
[0],f
[2]
266 else: # a merge with two parents
267 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
268 # later merge revision: feed in changed manifest
269 # for many files comparing checksums is expensive so only do it for
270 # merges where we really need it due to hg's revlog logic
271 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
272 type='thorough delta'
274 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
275 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
278 removed
=[r
.decode(fn_encoding
).encode('utf8') for r
in removed
]
280 removed
=[strip_leading_slash(x
) for x
in removed
]
282 map(lambda r
: wr('D %s' % r
),removed
)
283 export_file_contents(ctx
,man
,added
,hgtags
,fn_encoding
,filter_contents
,plugins
)
284 export_file_contents(ctx
,man
,changed
,hgtags
,fn_encoding
,filter_contents
,plugins
)
287 return checkpoint(count
)
289 def export_note(ui
,repo
,revision
,count
,authors
,encoding
,is_first
):
290 (revnode
,_
,user
,(time
,timezone
),_
,_
,_
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
292 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
294 wr('commit refs/notes/hg')
295 wr('committer %s %d %s' % (user
,time
,timezone
))
298 wr('from refs/notes/hg^0')
299 wr('N inline :%d' % (revision
+1))
300 hg_hash
=revsymbol(repo
,str(revision
)).hex()
301 wr('data %d' % (len(hg_hash
)))
304 return checkpoint(count
)
306 wr('data %d' % (len(desc
)+1)) # wtf?
310 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,tagsmap
):
313 # Remap the branch name
314 tag
=sanitize_name(tag
,"tag",tagsmap
)
315 # ignore latest revision
316 if tag
=='tip': continue
317 # ignore tags to nodes that are missing (ie, 'in the future')
318 if node
.encode('hex_codec') not in mapping_cache
:
319 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
322 rev
=int(mapping_cache
[node
.encode('hex_codec')])
324 ref
=revnum_to_revref(rev
, old_marks
)
326 sys
.stderr
.write('Failed to find reference for creating tag'
327 ' %s at r%d\n' % (tag
,rev
))
329 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
330 wr('reset refs/tags/%s' % tag
)
333 count
=checkpoint(count
)
336 def load_mapping(name
, filename
, mapping_is_raw
):
337 raw_regexp
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
338 string_regexp
='"(((\\.)|(\\")|[^"])*)"'
339 quoted_regexp
=re
.compile('^'+string_regexp
+'[ ]*=[ ]*'+string_regexp
+'$')
341 def parse_raw_line(line
):
342 m
=raw_regexp
.match(line
)
345 return (m
.group(1).strip(), m
.group(2).strip())
347 def parse_quoted_line(line
):
348 m
=quoted_regexp
.match(line
)
351 return (m
.group(1).decode('string_escape'),
352 m
.group(5).decode('string_escape'))
355 if not os
.path
.exists(filename
):
356 sys
.stderr
.write('Could not open mapping file [%s]\n' % (filename
))
361 for line
in f
.readlines():
364 if l
==1 and line
[0]=='#' and line
=='# quoted-escaped-strings':
366 elif line
=='' or line
[0]=='#':
368 m
=parse_raw_line(line
) if mapping_is_raw
else parse_quoted_line(line
)
370 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
372 # put key:value in cache, key without ^:
376 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
379 def branchtip(repo
, heads
):
380 '''return the tipmost branch head in heads'''
382 for h
in reversed(heads
):
383 if 'close' not in repo
.changelog
.read(h
)[5]:
388 def verify_heads(ui
,repo
,cache
,force
,branchesmap
):
390 for bn
, heads
in repo
.branchmap().iteritems():
391 branches
[bn
] = branchtip(repo
, heads
)
392 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
395 # get list of hg's branches to verify, don't take all git has
398 sanitized_name
=sanitize_name(b
,"branch",branchesmap
)
399 sha1
=get_git_sha1(sanitized_name
)
400 c
=cache
.get(sanitized_name
)
402 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
403 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
404 if not force
: return False
406 # verify that branch has exactly one head
408 for h
in repo
.heads():
409 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
410 if t
.get(branch
,False):
411 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
412 repo
.changelog
.rev(h
))
413 if not force
: return False
418 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,
419 authors
={},branchesmap
={},tagsmap
={},
420 sob
=False,force
=False,hgtags
=False,notes
=False,encoding
='',fn_encoding
='',filter_contents
=None,
422 def check_cache(filename
, contents
):
423 if len(contents
) == 0:
424 sys
.stderr
.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename
)
428 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
429 mapping_cache
=load_cache(mappingfile
)
430 heads_cache
=load_cache(headsfile
)
431 state_cache
=load_cache(tipfile
)
433 if len(state_cache
) != 0:
434 for (name
, data
) in [(marksfile
, old_marks
),
435 (mappingfile
, mapping_cache
),
436 (headsfile
, state_cache
)]:
437 check_cache(name
, data
)
439 ui
,repo
=setup_repo(repourl
)
441 if not verify_heads(ui
,repo
,heads_cache
,force
,branchesmap
):
445 tip
=repo
.changelog
.count()
446 except AttributeError:
449 min=int(state_cache
.get('tip',0))
451 if _max
<0 or max>tip
:
454 for rev
in range(0,max):
455 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
456 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
461 for rev
in range(min,max):
462 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,branchesmap
,
463 sob
,brmap
,hgtags
,encoding
,fn_encoding
,filter_contents
,
466 for rev
in range(min,max):
467 c
=export_note(ui
,repo
,rev
,c
,authors
, encoding
, rev
== min and min != 0)
469 state_cache
['tip']=max
470 state_cache
['repo']=repourl
471 save_cache(tipfile
,state_cache
)
472 save_cache(mappingfile
,mapping_cache
)
474 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,tagsmap
)
476 sys
.stderr
.write('Issued %d commands\n' % c
)
480 if __name__
=='__main__':
481 def bail(parser
,opt
):
482 sys
.stderr
.write('Error: No %s option given\n' % opt
)
486 parser
=OptionParser()
488 parser
.add_option("-m","--max",type="int",dest
="max",
489 help="Maximum hg revision to import")
490 parser
.add_option("--mapping",dest
="mappingfile",
491 help="File to read last run's hg-to-git SHA1 mapping")
492 parser
.add_option("--marks",dest
="marksfile",
493 help="File to read git-fast-import's marks from")
494 parser
.add_option("--heads",dest
="headsfile",
495 help="File to read last run's git heads from")
496 parser
.add_option("--status",dest
="statusfile",
497 help="File to read status from")
498 parser
.add_option("-r","--repo",dest
="repourl",
499 help="URL of repo to import")
500 parser
.add_option("-s",action
="store_true",dest
="sob",
501 default
=False,help="Enable parsing Signed-off-by lines")
502 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
503 default
=False,help="Enable exporting .hgtags files")
504 parser
.add_option("-A","--authors",dest
="authorfile",
505 help="Read authormap from AUTHORFILE")
506 parser
.add_option("-B","--branches",dest
="branchesfile",
507 help="Read branch map from BRANCHESFILE")
508 parser
.add_option("-T","--tags",dest
="tagsfile",
509 help="Read tags map from TAGSFILE")
510 parser
.add_option("-f","--force",action
="store_true",dest
="force",
511 default
=False,help="Ignore validation errors by force")
512 parser
.add_option("-M","--default-branch",dest
="default_branch",
513 help="Set the default branch")
514 parser
.add_option("-o","--origin",dest
="origin_name",
515 help="use <name> as namespace to track upstream")
516 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
517 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
518 parser
.add_option("-e",dest
="encoding",
519 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
520 parser
.add_option("--fe",dest
="fn_encoding",
521 help="Assume file names from Mercurial are encoded in <filename_encoding>")
522 parser
.add_option("--mappings-are-raw",dest
="raw_mappings", default
=False,
523 help="Assume mappings are raw <key>=<value> lines")
524 parser
.add_option("--filter-contents",dest
="filter_contents",
525 help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")
526 parser
.add_option("--plugin-path", type="string", dest
="pluginpath",
527 help="Additional search path for plugins ")
528 parser
.add_option("--plugin", action
="append", type="string", dest
="plugins",
529 help="Add a plugin with the given init string <name=init>")
531 (options
,args
)=parser
.parse_args()
534 if options
.max!=None: m
=options
.max
536 if options
.marksfile
==None: bail(parser
,'--marks')
537 if options
.mappingfile
==None: bail(parser
,'--mapping')
538 if options
.headsfile
==None: bail(parser
,'--heads')
539 if options
.statusfile
==None: bail(parser
,'--status')
540 if options
.repourl
==None: bail(parser
,'--repo')
543 if options
.authorfile
!=None:
544 a
=load_mapping('authors', options
.authorfile
, options
.raw_mappings
)
547 if options
.branchesfile
!=None:
548 b
=load_mapping('branches', options
.branchesfile
, options
.raw_mappings
)
551 if options
.tagsfile
!=None:
552 t
=load_mapping('tags', options
.tagsfile
, True)
554 if options
.default_branch
!=None:
555 set_default_branch(options
.default_branch
)
557 if options
.origin_name
!=None:
558 set_origin_name(options
.origin_name
)
561 if options
.encoding
!=None:
562 encoding
=options
.encoding
565 if options
.fn_encoding
!=None:
566 fn_encoding
=options
.fn_encoding
569 if options
.plugins
!=None:
570 plugins
+=options
.plugins
573 if options
.filter_contents
!=None:
575 filter_contents
=shlex
.split(options
.filter_contents
)
578 plugins_dict
['commit_message_filters']=[]
579 plugins_dict
['file_data_filters']=[]
581 if plugins
and options
.pluginpath
:
582 sys
.stderr
.write('Using additional plugin path: ' + options
.pluginpath
+ '\n')
584 for plugin
in plugins
:
585 split
= plugin
.split('=')
586 name
, opts
= split
[0], '='.join(split
[1:])
587 i
= pluginloader
.get_plugin(name
,options
.pluginpath
)
588 sys
.stderr
.write('Loaded plugin ' + i
['name'] + ' from path: ' + i
['path'] +' with opts: ' + opts
+ '\n')
589 plugin
= pluginloader
.load_plugin(i
).build_filter(opts
)
590 if hasattr(plugin
,'file_data_filter') and callable(plugin
.file_data_filter
):
591 plugins_dict
['file_data_filters'].append(plugin
.file_data_filter
)
592 if hasattr(plugin
, 'commit_message_filter') and callable(plugin
.commit_message_filter
):
593 plugins_dict
['commit_message_filters'].append(plugin
.commit_message_filter
)
595 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
596 options
.headsfile
, options
.statusfile
,
597 authors
=a
,branchesmap
=b
,tagsmap
=t
,
598 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
599 notes
=options
.notes
,encoding
=encoding
,fn_encoding
=fn_encoding
,filter_contents
=filter_contents
,
600 plugins
=plugins_dict
))