3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import node
7 from mercurial
.scmutil
import revsymbol
8 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
9 from hg2git
import load_cache
,save_cache
,get_git_sha1
,set_default_branch
,set_origin_name
10 from optparse
import OptionParser
16 if sys
.platform
== "win32":
17 # On Windows, sys.stdout is initially opened in text mode, which means that
18 # when a LF (\n) character is written to sys.stdout, it will be converted
19 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
20 # code to change the mode of sys.stdout to binary.
22 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
24 # silly regex to catch Signed-off-by lines in log message
25 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
26 # insert 'checkpoint' command after this many commits or none at all if 0
27 cfg_checkpoint_count
=0
28 # write some progress message every this many file contents written
29 cfg_export_boundary
=1000
32 return 'l' in flags
and '120000' or 'x' in flags
and '100755' or '100644'
40 sys
.stdout
.write('\n')
41 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
43 def checkpoint(count
):
45 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
46 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
51 def revnum_to_revref(rev
, old_marks
):
52 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
54 return old_marks
.get(rev
) or ':%d' % (rev
+1)
56 def file_mismatch(f1
,f2
):
57 """See if two revisions of a file are not equal."""
58 return node
.hex(f1
)!=node
.hex(f2
)
60 def split_dict(dleft
,dright
,l
=[],c
=[],r
=[],match
=file_mismatch
):
61 """Loop over our repository and find all changed and missing files."""
62 for left
in dleft
.keys():
63 right
=dright
.get(left
,None)
65 # we have the file but our parent hasn't: add to left set
67 elif match(dleft
[left
],right
) or gitmode(dleft
.flags(left
))!=gitmode(dright
.flags(left
)):
68 # we have it but checksums mismatch: add to center set
70 for right
in dright
.keys():
71 left
=dleft
.get(right
,None)
73 # if parent has file but we don't: add to right set
75 # change is already handled when comparing child against parent
78 def get_filechanges(repo
,revision
,parents
,mleft
):
79 """Given some repository and revision, find all changed/deleted files."""
83 mright
=revsymbol(repo
,str(p
)).manifest()
84 l
,c
,r
=split_dict(mleft
,mright
,l
,c
,r
)
90 def get_author(logmessage
,committer
,authors
):
91 """As git distincts between author and committer of a patch, try to
92 extract author by detecting Signed-off-by lines.
94 This walks from the end of the log message towards the top skipping
95 empty lines. Upon the first non-empty line, it walks all Signed-off-by
96 lines upwards to find the first one. For that (if found), it extracts
97 authorship information the usual way (authors table, cleaning, etc.)
99 If no Signed-off-by line is found, this defaults to the committer.
101 This may sound stupid (and it somehow is), but in log messages we
102 accidentially may have lines in the middle starting with
103 "Signed-off-by: foo" and thus matching our detection regex. Prevent
106 loglines
=logmessage
.split('\n')
108 # from tail walk to top skipping empty lines
111 if len(loglines
[i
].strip())==0: continue
114 # walk further upwards to find first sob line, store in 'first'
117 m
=sob_re
.match(loglines
[i
])
121 # if the last non-empty line matches our Signed-Off-by regex: extract username
123 r
=fixup_user(first
.group(1),authors
)
127 def export_file_contents(ctx
,manifest
,files
,hgtags
,encoding
='',plugins
={}):
131 # Skip .hgtags files. They only get us in trouble.
132 if not hgtags
and file == ".hgtags":
133 sys
.stderr
.write('Skip %s\n' % (file))
136 filename
=file.decode(encoding
).encode('utf8')
139 file_ctx
=ctx
.filectx(file)
142 if plugins
and plugins
['file_data_filters']:
143 file_data
= {'filename':filename
,'file_ctx':file_ctx
,'data':d
}
144 for filter in plugins
['file_data_filters']:
147 filename
=file_data
['filename']
148 file_ctx
=file_data
['file_ctx']
150 wr('M %s inline %s' % (gitmode(manifest
.flags(file)),
151 strip_leading_slash(filename
)))
152 wr('data %d' % len(d
)) # had some trouble with size()
155 if count
%cfg_export
_boundary
==0:
156 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
157 if max>cfg_export_boundary
:
158 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
160 def sanitize_name(name
,what
="branch", mapping
={}):
161 """Sanitize input roughly according to git-check-ref-format(1)"""
163 # NOTE: Do not update this transform to work around
164 # incompatibilities on your platform. If you change it and it starts
165 # modifying names which previously were not touched it will break
166 # preexisting setups which are doing incremental imports.
168 # Fast-export tries to not inflict arbitrary naming policy on the
169 # user, instead it aims to provide mechanisms allowing the user to
170 # apply their own policy. Therefore do not add a transform which can
171 # already be implemented with the -B and -T options to mangle branch
172 # and tag names. If you have a source repository where this is too
173 # much work to do manually, write a tool that does it for you.
177 if not name
: return name
178 if name
[0] == '.': return '_'+name
[1:]
181 n
=mapping
.get(name
,name
)
182 p
=re
.compile('([[ ~^:?\\\\*]|\.\.)')
184 if n
[-1] in ('/', '.'): n
=n
[:-1]+'_'
185 n
='/'.join(map(dot
,n
.split('/')))
190 sys
.stderr
.write('Warning: sanitized %s [%s] to [%s]\n' % (what
,name
,n
))
193 def strip_leading_slash(filename
):
194 if filename
[0] == '/':
198 def export_commit(ui
,repo
,revision
,old_marks
,max,count
,authors
,
199 branchesmap
,sob
,brmap
,hgtags
,encoding
='',fn_encoding
='',
201 def get_branchname(name
):
202 if brmap
.has_key(name
):
204 n
=sanitize_name(name
, "branch", branchesmap
)
208 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
210 branch
=get_branchname(branch
)
212 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
213 author
= get_author(desc
,user
,authors
)
215 if plugins
and plugins
['commit_message_filters']:
216 commit_data
= {'branch': branch
, 'parents': parents
, 'author': author
, 'desc': desc
}
217 for filter in plugins
['commit_message_filters']:
219 branch
= commit_data
['branch']
220 parents
= commit_data
['parents']
221 author
= commit_data
['author']
222 desc
= commit_data
['desc']
224 if len(parents
)==0 and revision
!= 0:
225 wr('reset refs/heads/%s' % branch
)
227 wr('commit refs/heads/%s' % branch
)
228 wr('mark :%d' % (revision
+1))
230 wr('author %s %d %s' % (author
,time
,timezone
))
231 wr('committer %s %d %s' % (user
,time
,timezone
))
232 wr('data %d' % (len(desc
)+1)) # wtf?
236 ctx
=revsymbol(repo
,str(revision
))
238 added
,changed
,removed
,type=[],[],[],''
240 if len(parents
) == 0:
241 # first revision: feed in full manifest
246 wr('from %s' % revnum_to_revref(parents
[0], old_marks
))
247 if len(parents
) == 1:
248 # later non-merge revision: feed in changed manifest
249 # if we have exactly one parent, just take the changes from the
250 # manifest without expensively comparing checksums
251 f
=repo
.status(parents
[0],revnode
)[:3]
252 added
,changed
,removed
=f
[1],f
[0],f
[2]
254 else: # a merge with two parents
255 wr('merge %s' % revnum_to_revref(parents
[1], old_marks
))
256 # later merge revision: feed in changed manifest
257 # for many files comparing checksums is expensive so only do it for
258 # merges where we really need it due to hg's revlog logic
259 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
260 type='thorough delta'
262 sys
.stderr
.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
263 (branch
,type,revision
+1,max,len(added
),len(changed
),len(removed
)))
266 removed
=[r
.decode(fn_encoding
).encode('utf8') for r
in removed
]
268 removed
=[strip_leading_slash(x
) for x
in removed
]
270 map(lambda r
: wr('D %s' % r
),removed
)
271 export_file_contents(ctx
,man
,added
,hgtags
,fn_encoding
,plugins
)
272 export_file_contents(ctx
,man
,changed
,hgtags
,fn_encoding
,plugins
)
275 return checkpoint(count
)
277 def export_note(ui
,repo
,revision
,count
,authors
,encoding
,is_first
):
278 (revnode
,_
,user
,(time
,timezone
),_
,_
,_
,_
)=get_changeset(ui
,repo
,revision
,authors
,encoding
)
280 parents
= [p
for p
in repo
.changelog
.parentrevs(revision
) if p
>= 0]
282 wr('commit refs/notes/hg')
283 wr('committer %s %d %s' % (user
,time
,timezone
))
286 wr('from refs/notes/hg^0')
287 wr('N inline :%d' % (revision
+1))
288 hg_hash
=revsymbol(repo
,str(revision
)).hex()
289 wr('data %d' % (len(hg_hash
)))
292 return checkpoint(count
)
294 wr('data %d' % (len(desc
)+1)) # wtf?
298 def export_tags(ui
,repo
,old_marks
,mapping_cache
,count
,authors
,tagsmap
):
301 # Remap the branch name
302 tag
=sanitize_name(tag
,"tag",tagsmap
)
303 # ignore latest revision
304 if tag
=='tip': continue
305 # ignore tags to nodes that are missing (ie, 'in the future')
306 if node
.encode('hex_codec') not in mapping_cache
:
307 sys
.stderr
.write('Tag %s refers to unseen node %s\n' % (tag
, node
.encode('hex_codec')))
310 rev
=int(mapping_cache
[node
.encode('hex_codec')])
312 ref
=revnum_to_revref(rev
, old_marks
)
314 sys
.stderr
.write('Failed to find reference for creating tag'
315 ' %s at r%d\n' % (tag
,rev
))
317 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
318 wr('reset refs/tags/%s' % tag
)
321 count
=checkpoint(count
)
324 def load_mapping(name
, filename
, mapping_is_raw
):
325 raw_regexp
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
326 string_regexp
='"(((\\.)|(\\")|[^"])*)"'
327 quoted_regexp
=re
.compile('^'+string_regexp
+'[ ]*=[ ]*'+string_regexp
+'$')
329 def parse_raw_line(line
):
330 m
=raw_regexp
.match(line
)
333 return (m
.group(1).strip(), m
.group(2).strip())
335 def parse_quoted_line(line
):
336 m
=quoted_regexp
.match(line
)
339 return (m
.group(1).decode('string_escape'),
340 m
.group(5).decode('string_escape'))
343 if not os
.path
.exists(filename
):
344 sys
.stderr
.write('Could not open mapping file [%s]\n' % (filename
))
349 for line
in f
.readlines():
352 if l
==1 and line
[0]=='#' and line
=='# quoted-escaped-strings':
354 elif line
=='' or line
[0]=='#':
356 m
=parse_raw_line(line
) if mapping_is_raw
else parse_quoted_line(line
)
358 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
360 # put key:value in cache, key without ^:
364 sys
.stderr
.write('Loaded %d %s\n' % (a
, name
))
367 def branchtip(repo
, heads
):
368 '''return the tipmost branch head in heads'''
370 for h
in reversed(heads
):
371 if 'close' not in repo
.changelog
.read(h
)[5]:
376 def verify_heads(ui
,repo
,cache
,force
,branchesmap
):
378 for bn
, heads
in repo
.branchmap().iteritems():
379 branches
[bn
] = branchtip(repo
, heads
)
380 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
383 # get list of hg's branches to verify, don't take all git has
386 sanitized_name
=sanitize_name(b
,"branch",branchesmap
)
387 sha1
=get_git_sha1(sanitized_name
)
388 c
=cache
.get(sanitized_name
)
390 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
391 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
392 if not force
: return False
394 # verify that branch has exactly one head
396 for h
in repo
.heads():
397 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
398 if t
.get(branch
,False):
399 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
400 repo
.changelog
.rev(h
))
401 if not force
: return False
406 def hg2git(repourl
,m
,marksfile
,mappingfile
,headsfile
,tipfile
,
407 authors
={},branchesmap
={},tagsmap
={},
408 sob
=False,force
=False,hgtags
=False,notes
=False,encoding
='',fn_encoding
='',
410 def check_cache(filename
, contents
):
411 if len(contents
) == 0:
412 sys
.stderr
.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename
)
416 old_marks
=load_cache(marksfile
,lambda s
: int(s
)-1)
417 mapping_cache
=load_cache(mappingfile
)
418 heads_cache
=load_cache(headsfile
)
419 state_cache
=load_cache(tipfile
)
421 if len(state_cache
) != 0:
422 for (name
, data
) in [(marksfile
, old_marks
),
423 (mappingfile
, mapping_cache
),
424 (headsfile
, state_cache
)]:
425 check_cache(name
, data
)
427 ui
,repo
=setup_repo(repourl
)
429 if not verify_heads(ui
,repo
,heads_cache
,force
,branchesmap
):
433 tip
=repo
.changelog
.count()
434 except AttributeError:
437 min=int(state_cache
.get('tip',0))
439 if _max
<0 or max>tip
:
442 for rev
in range(0,max):
443 (revnode
,_
,_
,_
,_
,_
,_
,_
)=get_changeset(ui
,repo
,rev
,authors
)
444 mapping_cache
[revnode
.encode('hex_codec')] = str(rev
)
449 for rev
in range(min,max):
450 c
=export_commit(ui
,repo
,rev
,old_marks
,max,c
,authors
,branchesmap
,
451 sob
,brmap
,hgtags
,encoding
,fn_encoding
,
454 for rev
in range(min,max):
455 c
=export_note(ui
,repo
,rev
,c
,authors
, encoding
, rev
== min and min != 0)
457 state_cache
['tip']=max
458 state_cache
['repo']=repourl
459 save_cache(tipfile
,state_cache
)
460 save_cache(mappingfile
,mapping_cache
)
462 c
=export_tags(ui
,repo
,old_marks
,mapping_cache
,c
,authors
,tagsmap
)
464 sys
.stderr
.write('Issued %d commands\n' % c
)
468 if __name__
=='__main__':
469 def bail(parser
,opt
):
470 sys
.stderr
.write('Error: No %s option given\n' % opt
)
474 parser
=OptionParser()
476 parser
.add_option("-m","--max",type="int",dest
="max",
477 help="Maximum hg revision to import")
478 parser
.add_option("--mapping",dest
="mappingfile",
479 help="File to read last run's hg-to-git SHA1 mapping")
480 parser
.add_option("--marks",dest
="marksfile",
481 help="File to read git-fast-import's marks from")
482 parser
.add_option("--heads",dest
="headsfile",
483 help="File to read last run's git heads from")
484 parser
.add_option("--status",dest
="statusfile",
485 help="File to read status from")
486 parser
.add_option("-r","--repo",dest
="repourl",
487 help="URL of repo to import")
488 parser
.add_option("-s",action
="store_true",dest
="sob",
489 default
=False,help="Enable parsing Signed-off-by lines")
490 parser
.add_option("--hgtags",action
="store_true",dest
="hgtags",
491 default
=False,help="Enable exporting .hgtags files")
492 parser
.add_option("-A","--authors",dest
="authorfile",
493 help="Read authormap from AUTHORFILE")
494 parser
.add_option("-B","--branches",dest
="branchesfile",
495 help="Read branch map from BRANCHESFILE")
496 parser
.add_option("-T","--tags",dest
="tagsfile",
497 help="Read tags map from TAGSFILE")
498 parser
.add_option("-f","--force",action
="store_true",dest
="force",
499 default
=False,help="Ignore validation errors by force")
500 parser
.add_option("-M","--default-branch",dest
="default_branch",
501 help="Set the default branch")
502 parser
.add_option("-o","--origin",dest
="origin_name",
503 help="use <name> as namespace to track upstream")
504 parser
.add_option("--hg-hash",action
="store_true",dest
="notes",
505 default
=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
506 parser
.add_option("-e",dest
="encoding",
507 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
508 parser
.add_option("--fe",dest
="fn_encoding",
509 help="Assume file names from Mercurial are encoded in <filename_encoding>")
510 parser
.add_option("--mappings-are-raw",dest
="raw_mappings", default
=False,
511 help="Assume mappings are raw <key>=<value> lines")
512 parser
.add_option("--filter-contents",dest
="filter_contents",
513 help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")
514 parser
.add_option("--plugin-path", type="string", dest
="pluginpath",
515 help="Additional search path for plugins ")
516 parser
.add_option("--plugin", action
="append", type="string", dest
="plugins",
517 help="Add a plugin with the given init string <name=init>")
519 (options
,args
)=parser
.parse_args()
522 if options
.max!=None: m
=options
.max
524 if options
.marksfile
==None: bail(parser
,'--marks')
525 if options
.mappingfile
==None: bail(parser
,'--mapping')
526 if options
.headsfile
==None: bail(parser
,'--heads')
527 if options
.statusfile
==None: bail(parser
,'--status')
528 if options
.repourl
==None: bail(parser
,'--repo')
531 if options
.authorfile
!=None:
532 a
=load_mapping('authors', options
.authorfile
, options
.raw_mappings
)
535 if options
.branchesfile
!=None:
536 b
=load_mapping('branches', options
.branchesfile
, options
.raw_mappings
)
539 if options
.tagsfile
!=None:
540 t
=load_mapping('tags', options
.tagsfile
, True)
542 if options
.default_branch
!=None:
543 set_default_branch(options
.default_branch
)
545 if options
.origin_name
!=None:
546 set_origin_name(options
.origin_name
)
549 if options
.encoding
!=None:
550 encoding
=options
.encoding
553 if options
.fn_encoding
!=None:
554 fn_encoding
=options
.fn_encoding
557 if options
.plugins
!=None:
558 plugins
+=options
.plugins
560 if options
.filter_contents
!=None:
561 plugins
+=['shell_filter_file_contents='+options
.filter_contents
]
564 plugins_dict
['commit_message_filters']=[]
565 plugins_dict
['file_data_filters']=[]
567 if plugins
and options
.pluginpath
:
568 sys
.stderr
.write('Using additional plugin path: ' + options
.pluginpath
+ '\n')
570 for plugin
in plugins
:
571 split
= plugin
.split('=')
572 name
, opts
= split
[0], '='.join(split
[1:])
573 i
= pluginloader
.get_plugin(name
,options
.pluginpath
)
574 sys
.stderr
.write('Loaded plugin ' + i
['name'] + ' from path: ' + i
['path'] +' with opts: ' + opts
+ '\n')
575 plugin
= pluginloader
.load_plugin(i
).build_filter(opts
)
576 if hasattr(plugin
,'file_data_filter') and callable(plugin
.file_data_filter
):
577 plugins_dict
['file_data_filters'].append(plugin
.file_data_filter
)
578 if hasattr(plugin
, 'commit_message_filter') and callable(plugin
.commit_message_filter
):
579 plugins_dict
['commit_message_filters'].append(plugin
.commit_message_filter
)
581 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.mappingfile
,
582 options
.headsfile
, options
.statusfile
,
583 authors
=a
,branchesmap
=b
,tagsmap
=t
,
584 sob
=options
.sob
,force
=options
.force
,hgtags
=options
.hgtags
,
585 notes
=options
.notes
,encoding
=encoding
,fn_encoding
=fn_encoding
,
586 plugins
=plugins_dict
))