Create codeql-analysis.yml
[fast-export.git] / hg-fast-export.py
blob7c22d81e3a1b58e37476b59c814f12df3cf48197
1 #!/usr/bin/env python2
3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import node
7 from mercurial.scmutil import revsymbol
8 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
9 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
10 from optparse import OptionParser
11 import re
12 import sys
13 import os
14 from binascii import hexlify
15 import pluginloader
16 PY2 = sys.version_info.major == 2
17 if PY2:
18 str = unicode
20 if PY2 and sys.platform == "win32":
21 # On Windows, sys.stdout is initially opened in text mode, which means that
22 # when a LF (\n) character is written to sys.stdout, it will be converted
23 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
24 # code to change the mode of sys.stdout to binary.
25 import msvcrt
26 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
28 # silly regex to catch Signed-off-by lines in log message
29 sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$')
30 # insert 'checkpoint' command after this many commits or none at all if 0
31 cfg_checkpoint_count=0
32 # write some progress message every this many file contents written
33 cfg_export_boundary=1000
35 subrepo_cache={}
36 submodule_mappings=None
38 # True if fast export should automatically try to sanitize
39 # author/branch/tag names.
40 auto_sanitize = None
42 stdout_buffer = sys.stdout if PY2 else sys.stdout.buffer
43 stderr_buffer = sys.stderr if PY2 else sys.stderr.buffer
45 def gitmode(flags):
46 return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644'
48 def wr_no_nl(msg=b''):
49 assert isinstance(msg, bytes)
50 if msg:
51 stdout_buffer.write(msg)
53 def wr(msg=b''):
54 wr_no_nl(msg)
55 stdout_buffer.write(b'\n')
56 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
58 def checkpoint(count):
59 count=count+1
60 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
61 stderr_buffer.write(b"Checkpoint after %d commits\n" % count)
62 wr(b'checkpoint')
63 wr()
64 return count
66 def revnum_to_revref(rev, old_marks):
67 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
68 or a mark)"""
69 return old_marks.get(rev) or b':%d' % (rev+1)
71 def file_mismatch(f1,f2):
72 """See if two revisions of a file are not equal."""
73 return node.hex(f1)!=node.hex(f2)
75 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
76 """Loop over our repository and find all changed and missing files."""
77 for left in dleft.keys():
78 right=dright.get(left,None)
79 if right==None:
80 # we have the file but our parent hasn't: add to left set
81 l.append(left)
82 elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
83 # we have it but checksums mismatch: add to center set
84 c.append(left)
85 for right in dright.keys():
86 left=dleft.get(right,None)
87 if left==None:
88 # if parent has file but we don't: add to right set
89 r.append(right)
90 # change is already handled when comparing child against parent
91 return l,c,r
93 def get_filechanges(repo,revision,parents,mleft):
94 """Given some repository and revision, find all changed/deleted files."""
95 l,c,r=[],[],[]
96 for p in parents:
97 if p<0: continue
98 mright=revsymbol(repo,b"%d" %p).manifest()
99 l,c,r=split_dict(mleft,mright,l,c,r)
100 l.sort()
101 c.sort()
102 r.sort()
103 return l,c,r
105 def get_author(logmessage,committer,authors):
106 """As git distincts between author and committer of a patch, try to
107 extract author by detecting Signed-off-by lines.
109 This walks from the end of the log message towards the top skipping
110 empty lines. Upon the first non-empty line, it walks all Signed-off-by
111 lines upwards to find the first one. For that (if found), it extracts
112 authorship information the usual way (authors table, cleaning, etc.)
114 If no Signed-off-by line is found, this defaults to the committer.
116 This may sound stupid (and it somehow is), but in log messages we
117 accidentially may have lines in the middle starting with
118 "Signed-off-by: foo" and thus matching our detection regex. Prevent
119 that."""
121 loglines=logmessage.split(b'\n')
122 i=len(loglines)
123 # from tail walk to top skipping empty lines
124 while i>=0:
125 i-=1
126 if len(loglines[i].strip())==0: continue
127 break
128 if i>=0:
129 # walk further upwards to find first sob line, store in 'first'
130 first=None
131 while i>=0:
132 m=sob_re.match(loglines[i])
133 if m==None: break
134 first=m
135 i-=1
136 # if the last non-empty line matches our Signed-Off-by regex: extract username
137 if first!=None:
138 r=fixup_user(first.group(1),authors)
139 return r
140 return committer
142 def remove_gitmodules(ctx):
143 """Removes all submodules of ctx parents"""
144 # Removing all submoduies coming from all parents is safe, as the submodules
145 # of the current commit will be re-added below. A possible optimization would
146 # be to only remove the submodules of the first parent.
147 for parent_ctx in ctx.parents():
148 for submodule in parent_ctx.substate.keys():
149 wr(b'D %s' % submodule)
150 wr(b'D .gitmodules')
152 def refresh_git_submodule(name,subrepo_info):
153 wr(b'M 160000 %s %s' % (subrepo_info[1],name))
154 stderr_buffer.write(
155 b"Adding/updating submodule %s, revision %s\n" % (name, subrepo_info[1])
157 return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0])
159 def refresh_hg_submodule(name,subrepo_info):
160 gitRepoLocation=submodule_mappings[name] + b"/.git"
162 # Populate the cache to map mercurial revision to git revision
163 if not name in subrepo_cache:
164 subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"),
165 load_cache(gitRepoLocation+b"/hg2git-marks",
166 lambda s: int(s)-1))
168 (mapping_cache,marks_cache)=subrepo_cache[name]
169 subrepo_hash=subrepo_info[1]
170 if subrepo_hash in mapping_cache:
171 revnum=mapping_cache[subrepo_hash]
172 gitSha=marks_cache[int(revnum)]
173 wr(b'M 160000 %s %s' % (gitSha,name))
174 stderr_buffer.write(
175 b"Adding/updating submodule %s, revision %s->%s\n"
176 % (name, subrepo_hash, gitSha)
178 return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
179 submodule_mappings[name])
180 else:
181 stderr_buffer.write(
182 b"Warning: Could not find hg revision %s for %s in git %s\n"
183 % (subrepo_hash, name, gitRepoLocation,)
185 return b''
187 def refresh_gitmodules(ctx):
188 """Updates list of ctx submodules according to .hgsubstate file"""
189 remove_gitmodules(ctx)
190 gitmodules=b""
191 # Create the .gitmodules file and all submodules
192 for name,subrepo_info in ctx.substate.items():
193 if subrepo_info[2]==b'git':
194 gitmodules+=refresh_git_submodule(name,subrepo_info)
195 elif submodule_mappings and name in submodule_mappings:
196 gitmodules+=refresh_hg_submodule(name,subrepo_info)
198 if len(gitmodules):
199 wr(b'M 100644 inline .gitmodules')
200 wr(b'data %d' % (len(gitmodules)+1))
201 wr(gitmodules)
203 def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
204 count=0
205 max=len(files)
206 is_submodules_refreshed=False
207 for file in files:
208 if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'):
209 is_submodules_refreshed=True
210 refresh_gitmodules(ctx)
211 # Skip .hgtags files. They only get us in trouble.
212 if not hgtags and file == b".hgtags":
213 stderr_buffer.write(b'Skip %s\n' % file)
214 continue
215 if encoding:
216 filename=file.decode(encoding).encode('utf8')
217 else:
218 filename=file
219 if b'.git' in filename.split(b'/'): # Even on Windows, the path separator is / here.
220 stderr_buffer.write(
221 b'Ignoring file %s which cannot be tracked by git\n' % filename
223 continue
224 file_ctx=ctx.filectx(file)
225 d=file_ctx.data()
227 if plugins and plugins['file_data_filters']:
228 file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
229 for filter in plugins['file_data_filters']:
230 filter(file_data)
231 d=file_data['data']
232 filename=file_data['filename']
233 file_ctx=file_data['file_ctx']
235 wr(b'M %s inline %s' % (gitmode(manifest.flags(file)),
236 strip_leading_slash(filename)))
237 wr(b'data %d' % len(d)) # had some trouble with size()
238 wr(d)
239 count+=1
240 if count%cfg_export_boundary==0:
241 stderr_buffer.write(b'Exported %d/%d files\n' % (count,max))
242 if max>cfg_export_boundary:
243 stderr_buffer.write(b'Exported %d/%d files\n' % (count,max))
245 def sanitize_name(name,what="branch", mapping={}):
246 """Sanitize input roughly according to git-check-ref-format(1)"""
248 # NOTE: Do not update this transform to work around
249 # incompatibilities on your platform. If you change it and it starts
250 # modifying names which previously were not touched it will break
251 # preexisting setups which are doing incremental imports.
253 # Fast-export tries to not inflict arbitrary naming policy on the
254 # user, instead it aims to provide mechanisms allowing the user to
255 # apply their own policy. Therefore do not add a transform which can
256 # already be implemented with the -B and -T options to mangle branch
257 # and tag names. If you have a source repository where this is too
258 # much work to do manually, write a tool that does it for you.
261 def dot(name):
262 if not name: return name
263 if name[0:1] == b'.': return b'_'+name[1:]
264 return name
266 if not auto_sanitize:
267 return mapping.get(name,name)
268 n=mapping.get(name,name)
269 p=re.compile(b'([[ ~^:?\\\\*]|\.\.)')
270 n=p.sub(b'_', n)
271 if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_'
272 n=b'/'.join([dot(s) for s in n.split(b'/')])
273 p=re.compile(b'_+')
274 n=p.sub(b'_', n)
276 if n!=name:
277 stderr_buffer.write(
278 b'Warning: sanitized %s [%s] to [%s]\n' % (what.encode(), name, n)
280 return n
282 def strip_leading_slash(filename):
283 if filename[0:1] == b'/':
284 return filename[1:]
285 return filename
287 def export_commit(ui,repo,revision,old_marks,max,count,authors,
288 branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
289 plugins={}):
290 def get_branchname(name):
291 if name in brmap:
292 return brmap[name]
293 n=sanitize_name(name, "branch", branchesmap)
294 brmap[name]=n
295 return n
297 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
298 if repo[revnode].hidden():
299 return count
301 branch=get_branchname(branch)
303 parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
304 author = get_author(desc,user,authors)
305 hg_hash=revsymbol(repo,b"%d" % revision).hex()
307 if plugins and plugins['commit_message_filters']:
308 commit_data = {'branch': branch, 'parents': parents,
309 'author': author, 'desc': desc,
310 'revision': revision, 'hg_hash': hg_hash,
311 'committer': user}
312 for filter in plugins['commit_message_filters']:
313 filter(commit_data)
314 branch = commit_data['branch']
315 parents = commit_data['parents']
316 author = commit_data['author']
317 user = commit_data['committer']
318 desc = commit_data['desc']
320 if len(parents)==0 and revision != 0:
321 wr(b'reset refs/heads/%s' % branch)
323 wr(b'commit refs/heads/%s' % branch)
324 wr(b'mark :%d' % (revision+1))
325 if sob:
326 wr(b'author %s %d %s' % (author,time,timezone))
327 wr(b'committer %s %d %s' % (user,time,timezone))
328 wr(b'data %d' % (len(desc)+1)) # wtf?
329 wr(desc)
330 wr()
332 ctx=revsymbol(repo, b"%d" % revision)
333 man=ctx.manifest()
334 added,changed,removed,type=[],[],[],''
336 if len(parents) == 0:
337 # first revision: feed in full manifest
338 added=man.keys()
339 added.sort()
340 type='full'
341 else:
342 wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
343 if len(parents) == 1:
344 # later non-merge revision: feed in changed manifest
345 # if we have exactly one parent, just take the changes from the
346 # manifest without expensively comparing checksums
347 f=repo.status(parents[0],revnode)
348 added,changed,removed=f.added,f.modified,f.removed
349 type='simple delta'
350 else: # a merge with two parents
351 wr(b'merge %s' % revnum_to_revref(parents[1], old_marks))
352 # later merge revision: feed in changed manifest
353 # for many files comparing checksums is expensive so only do it for
354 # merges where we really need it due to hg's revlog logic
355 added,changed,removed=get_filechanges(repo,revision,parents,man)
356 type='thorough delta'
358 stderr_buffer.write(
359 b'%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n'
360 % (branch, type.encode(), revision + 1, max, len(added), len(changed), len(removed))
363 for filename in removed:
364 if fn_encoding:
365 filename=filename.decode(fn_encoding).encode('utf8')
366 filename=strip_leading_slash(filename)
367 if filename==b'.hgsub':
368 remove_gitmodules(ctx)
369 wr(b'D %s' % filename)
371 export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
372 export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
373 wr()
375 return checkpoint(count)
377 def export_note(ui,repo,revision,count,authors,encoding,is_first):
378 (revnode,_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
379 if repo[revnode].hidden():
380 return count
382 parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
384 wr(b'commit refs/notes/hg')
385 wr(b'committer %s %d %s' % (user,time,timezone))
386 wr(b'data 0')
387 if is_first:
388 wr(b'from refs/notes/hg^0')
389 wr(b'N inline :%d' % (revision+1))
390 hg_hash=revsymbol(repo,b"%d" % revision).hex()
391 wr(b'data %d' % (len(hg_hash)))
392 wr_no_nl(hg_hash)
393 wr()
394 return checkpoint(count)
396 def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
397 l=repo.tagslist()
398 for tag,node in l:
399 # Remap the branch name
400 tag=sanitize_name(tag,"tag",tagsmap)
401 # ignore latest revision
402 if tag==b'tip': continue
403 # ignore tags to nodes that are missing (ie, 'in the future')
404 if hexlify(node) not in mapping_cache:
405 stderr_buffer.write(b'Tag %s refers to unseen node %s\n' % (tag, hexlify(node)))
406 continue
408 rev=int(mapping_cache[hexlify(node)])
410 ref=revnum_to_revref(rev, old_marks)
411 if ref==None:
412 stderr_buffer.write(
413 b'Failed to find reference for creating tag %s at r%d\n' % (tag, rev)
415 continue
416 stderr_buffer.write(b'Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag, rev, ref))
417 wr(b'reset refs/tags/%s' % tag)
418 wr(b'from %s' % ref)
419 wr()
420 count=checkpoint(count)
421 return count
423 def load_mapping(name, filename, mapping_is_raw):
424 raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$')
425 string_regexp=b'"(((\\.)|(\\")|[^"])*)"'
426 quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$')
428 def parse_raw_line(line):
429 m=raw_regexp.match(line)
430 if m==None:
431 return None
432 return (m.group(1).strip(), m.group(2).strip())
434 def process_unicode_escape_sequences(s):
435 # Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
436 # the UTF8-encoded characters they represent. We need to do an additional
437 # .decode('utf8').encode('unicode-escape') to convert any non-ascii characters into
438 # their escape sequences so that the subsequent .decode('unicode-escape') succeeds:
439 return s.decode('utf8').encode('unicode-escape').decode('unicode-escape').encode('utf8')
441 def parse_quoted_line(line):
442 m=quoted_regexp.match(line)
443 if m==None:
444 return
446 return (process_unicode_escape_sequences(m.group(1)),
447 process_unicode_escape_sequences(m.group(5)))
449 cache={}
450 if not os.path.exists(filename):
451 sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
452 return cache
453 f=open(filename,'rb')
456 for line in f.readlines():
457 l+=1
458 line=line.strip()
459 if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings':
460 continue
461 elif line==b'' or line[0:1]==b'#':
462 continue
463 m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
464 if m==None:
465 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
466 continue
467 # put key:value in cache, key without ^:
468 cache[m[0]]=m[1]
469 a+=1
470 f.close()
471 sys.stderr.write('Loaded %d %s\n' % (a, name))
472 return cache
474 def branchtip(repo, heads):
475 '''return the tipmost branch head in heads'''
476 tip = heads[-1]
477 for h in reversed(heads):
478 if 'close' not in repo.changelog.read(h)[5]:
479 tip = h
480 break
481 return tip
483 def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
484 branches={}
485 for bn, heads in repo.branchmap().iteritems():
486 branches[bn] = branchtip(repo, heads)
487 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
488 l.sort()
490 # get list of hg's branches to verify, don't take all git has
491 for _,_,b in l:
492 b=get_branch(b)
493 sanitized_name=sanitize_name(b,"branch",branchesmap)
494 sha1=get_git_sha1(sanitized_name)
495 c=cache.get(sanitized_name)
496 if sha1!=c:
497 stderr_buffer.write(
498 b'Error: Branch [%s] modified outside hg-fast-export:'
499 b'\n%s (repo) != %s (cache)\n' % (b, b'<None>' if sha1 is None else sha1, c)
501 if not force: return False
503 # verify that branch has exactly one head
504 t={}
505 unnamed_heads=False
506 for h in repo.filtered(b'visible').heads():
507 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
508 if t.get(branch,False):
509 stderr_buffer.write(
510 b'Error: repository has an unnamed head: hg r%d\n'
511 % repo.changelog.rev(h)
513 unnamed_heads=True
514 if not force and not ignore_unnamed_heads: return False
515 t[branch]=True
516 if unnamed_heads and not force and not ignore_unnamed_heads: return False
517 return True
519 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
520 authors={},branchesmap={},tagsmap={},
521 sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
522 plugins={}):
523 def check_cache(filename, contents):
524 if len(contents) == 0:
525 sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename)
527 _max=int(m)
529 old_marks=load_cache(marksfile,lambda s: int(s)-1)
530 mapping_cache=load_cache(mappingfile)
531 heads_cache=load_cache(headsfile)
532 state_cache=load_cache(tipfile)
534 if len(state_cache) != 0:
535 for (name, data) in [(marksfile, old_marks),
536 (mappingfile, mapping_cache),
537 (headsfile, state_cache)]:
538 check_cache(name, data)
540 ui,repo=setup_repo(repourl)
542 if not verify_heads(ui,repo,heads_cache,force,ignore_unnamed_heads,branchesmap):
543 return 1
545 try:
546 tip=repo.changelog.count()
547 except AttributeError:
548 tip=len(repo)
550 min=int(state_cache.get('tip',0))
551 max=_max
552 if _max<0 or max>tip:
553 max=tip
555 for rev in range(0,max):
556 (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
557 if repo[revnode].hidden():
558 continue
559 mapping_cache[hexlify(revnode)] = b"%d" % rev
561 if submodule_mappings:
562 # Make sure that all mercurial submodules are registered in the submodule-mappings file
563 for rev in range(0,max):
564 ctx=revsymbol(repo,b"%d" % rev)
565 if ctx.hidden():
566 continue
567 if ctx.substate:
568 for key in ctx.substate:
569 if ctx.substate[key][2]=='hg' and key not in submodule_mappings:
570 sys.stderr.write("Error: %s not found in submodule-mappings\n" % (key))
571 return 1
574 brmap={}
575 for rev in range(min,max):
576 c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
577 sob,brmap,hgtags,encoding,fn_encoding,
578 plugins)
579 if notes:
580 for rev in range(min,max):
581 c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
583 state_cache['tip']=max
584 state_cache['repo']=repourl
585 save_cache(tipfile,state_cache)
586 save_cache(mappingfile,mapping_cache)
588 c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap)
590 sys.stderr.write('Issued %d commands\n' % c)
592 return 0
594 if __name__=='__main__':
595 def bail(parser,opt):
596 sys.stderr.write('Error: No %s option given\n' % opt)
597 parser.print_help()
598 sys.exit(2)
600 parser=OptionParser()
602 parser.add_option("-n", "--no-auto-sanitize",action="store_false",
603 dest="auto_sanitize",default=True,
604 help="Do not perform built-in (broken in many cases) sanitizing of names")
605 parser.add_option("-m","--max",type="int",dest="max",
606 help="Maximum hg revision to import")
607 parser.add_option("--mapping",dest="mappingfile",
608 help="File to read last run's hg-to-git SHA1 mapping")
609 parser.add_option("--marks",dest="marksfile",
610 help="File to read git-fast-import's marks from")
611 parser.add_option("--heads",dest="headsfile",
612 help="File to read last run's git heads from")
613 parser.add_option("--status",dest="statusfile",
614 help="File to read status from")
615 parser.add_option("-r","--repo",dest="repourl",
616 help="URL of repo to import")
617 parser.add_option("-s",action="store_true",dest="sob",
618 default=False,help="Enable parsing Signed-off-by lines")
619 parser.add_option("--hgtags",action="store_true",dest="hgtags",
620 default=False,help="Enable exporting .hgtags files")
621 parser.add_option("-A","--authors",dest="authorfile",
622 help="Read authormap from AUTHORFILE")
623 parser.add_option("-B","--branches",dest="branchesfile",
624 help="Read branch map from BRANCHESFILE")
625 parser.add_option("-T","--tags",dest="tagsfile",
626 help="Read tags map from TAGSFILE")
627 parser.add_option("-f","--force",action="store_true",dest="force",
628 default=False,help="Ignore validation errors by force, implies --ignore-unnamed-heads")
629 parser.add_option("--ignore-unnamed-heads",action="store_true",dest="ignore_unnamed_heads",
630 default=False,help="Ignore unnamed head errors")
631 parser.add_option("-M","--default-branch",dest="default_branch",
632 help="Set the default branch")
633 parser.add_option("-o","--origin",dest="origin_name",
634 help="use <name> as namespace to track upstream")
635 parser.add_option("--hg-hash",action="store_true",dest="notes",
636 default=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
637 parser.add_option("-e",dest="encoding",
638 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
639 parser.add_option("--fe",dest="fn_encoding",
640 help="Assume file names from Mercurial are encoded in <filename_encoding>")
641 parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
642 help="Assume mappings are raw <key>=<value> lines")
643 parser.add_option("--filter-contents",dest="filter_contents",
644 help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")
645 parser.add_option("--plugin-path", type="string", dest="pluginpath",
646 help="Additional search path for plugins ")
647 parser.add_option("--plugin", action="append", type="string", dest="plugins",
648 help="Add a plugin with the given init string <name=init>")
649 parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
650 help="Provide a mapping file between the subrepository name and the submodule name")
652 (options,args)=parser.parse_args()
654 m=-1
655 auto_sanitize = options.auto_sanitize
656 if options.max!=None: m=options.max
658 if options.marksfile==None: bail(parser,'--marks')
659 if options.mappingfile==None: bail(parser,'--mapping')
660 if options.headsfile==None: bail(parser,'--heads')
661 if options.statusfile==None: bail(parser,'--status')
662 if options.repourl==None: bail(parser,'--repo')
664 if options.subrepo_map:
665 if not os.path.exists(options.subrepo_map):
666 sys.stderr.write('Subrepo mapping file not found %s\n'
667 % options.subrepo_map)
668 sys.exit(1)
669 submodule_mappings=load_mapping('subrepo mappings',
670 options.subrepo_map,False)
672 a={}
673 if options.authorfile!=None:
674 a=load_mapping('authors', options.authorfile, options.raw_mappings)
676 b={}
677 if options.branchesfile!=None:
678 b=load_mapping('branches', options.branchesfile, options.raw_mappings)
680 t={}
681 if options.tagsfile!=None:
682 t=load_mapping('tags', options.tagsfile, options.raw_mappings)
684 if options.default_branch!=None:
685 set_default_branch(options.default_branch)
687 if options.origin_name!=None:
688 set_origin_name(options.origin_name)
690 encoding=''
691 if options.encoding!=None:
692 encoding=options.encoding
694 fn_encoding=encoding
695 if options.fn_encoding!=None:
696 fn_encoding=options.fn_encoding
698 plugins=[]
699 if options.plugins!=None:
700 plugins+=options.plugins
702 if options.filter_contents!=None:
703 plugins+=['shell_filter_file_contents='+options.filter_contents]
705 plugins_dict={}
706 plugins_dict['commit_message_filters']=[]
707 plugins_dict['file_data_filters']=[]
709 if plugins and options.pluginpath:
710 sys.stderr.write('Using additional plugin path: ' + options.pluginpath + '\n')
712 for plugin in plugins:
713 split = plugin.split('=')
714 name, opts = split[0], '='.join(split[1:])
715 i = pluginloader.get_plugin(name,options.pluginpath)
716 sys.stderr.write('Loaded plugin ' + i['name'] + ' from path: ' + i['path'] +' with opts: ' + opts + '\n')
717 plugin = pluginloader.load_plugin(i).build_filter(opts)
718 if hasattr(plugin,'file_data_filter') and callable(plugin.file_data_filter):
719 plugins_dict['file_data_filters'].append(plugin.file_data_filter)
720 if hasattr(plugin, 'commit_message_filter') and callable(plugin.commit_message_filter):
721 plugins_dict['commit_message_filters'].append(plugin.commit_message_filter)
723 sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
724 options.headsfile, options.statusfile,
725 authors=a,branchesmap=b,tagsmap=t,
726 sob=options.sob,force=options.force,
727 ignore_unnamed_heads=options.ignore_unnamed_heads,
728 hgtags=options.hgtags,
729 notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
730 plugins=plugins_dict))