Share the changectx more
[fast-export.git] / hg-fast-export.py
blob6005d26874cfc7b2f5edc82a22793cff5522059e
1 #!/usr/bin/env python2
3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
8 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
9 from optparse import OptionParser
10 import re
11 import sys
12 import os
13 from binascii import hexlify
14 import pluginloader
15 PY2 = sys.version_info.major == 2
16 if PY2:
17 str = unicode
19 if PY2 and sys.platform == "win32":
20 # On Windows, sys.stdout is initially opened in text mode, which means that
21 # when a LF (\n) character is written to sys.stdout, it will be converted
22 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
23 # code to change the mode of sys.stdout to binary.
24 import msvcrt
25 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
27 # silly regex to catch Signed-off-by lines in log message
28 sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$')
29 # insert 'checkpoint' command after this many commits or none at all if 0
30 cfg_checkpoint_count=0
31 # write some progress message every this many file contents written
32 cfg_export_boundary=1000
34 subrepo_cache={}
35 submodule_mappings=None
37 # True if fast export should automatically try to sanitize
38 # author/branch/tag names.
39 auto_sanitize = None
41 stdout_buffer = sys.stdout if PY2 else sys.stdout.buffer
42 stderr_buffer = sys.stderr if PY2 else sys.stderr.buffer
44 def gitmode(flags):
45 return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644'
47 def wr_no_nl(msg=b''):
48 assert isinstance(msg, bytes)
49 if msg:
50 stdout_buffer.write(msg)
52 def wr(msg=b''):
53 wr_no_nl(msg + b'\n')
54 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
56 def wr_data(data):
57 wr(b'data %d' % (len(data)))
58 wr(data)
60 def checkpoint(count):
61 count=count+1
62 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
63 stderr_buffer.write(b"Checkpoint after %d commits\n" % count)
64 wr(b'checkpoint')
65 wr()
66 return count
68 def revnum_to_revref(rev, old_marks):
69 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
70 or a mark)"""
71 return old_marks.get(rev) or b':%d' % (rev+1)
73 def file_mismatch(f1,f2):
74 """See if two revisions of a file are not equal."""
75 return node.hex(f1)!=node.hex(f2)
77 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
78 """Loop over our repository and find all changed and missing files."""
79 for left in dleft.keys():
80 right=dright.get(left,None)
81 if right==None:
82 # we have the file but our parent hasn't: add to left set
83 l.append(left)
84 elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
85 # we have it but checksums mismatch: add to center set
86 c.append(left)
87 for right in dright.keys():
88 left=dleft.get(right,None)
89 if left==None:
90 # if parent has file but we don't: add to right set
91 r.append(right)
92 # change is already handled when comparing child against parent
93 return l,c,r
95 def get_filechanges(repo,revision,parents,mleft):
96 """Given some repository and revision, find all changed/deleted files."""
97 l,c,r=[],[],[]
98 for p in parents:
99 if p<0: continue
100 mright=repo[p].manifest()
101 l,c,r=split_dict(mleft,mright,l,c,r)
102 l.sort()
103 c.sort()
104 r.sort()
105 return l,c,r
107 def get_author(logmessage,committer,authors):
108 """As git distincts between author and committer of a patch, try to
109 extract author by detecting Signed-off-by lines.
111 This walks from the end of the log message towards the top skipping
112 empty lines. Upon the first non-empty line, it walks all Signed-off-by
113 lines upwards to find the first one. For that (if found), it extracts
114 authorship information the usual way (authors table, cleaning, etc.)
116 If no Signed-off-by line is found, this defaults to the committer.
118 This may sound stupid (and it somehow is), but in log messages we
119 accidentially may have lines in the middle starting with
120 "Signed-off-by: foo" and thus matching our detection regex. Prevent
121 that."""
123 loglines=logmessage.split(b'\n')
124 i=len(loglines)
125 # from tail walk to top skipping empty lines
126 while i>=0:
127 i-=1
128 if len(loglines[i].strip())==0: continue
129 break
130 if i>=0:
131 # walk further upwards to find first sob line, store in 'first'
132 first=None
133 while i>=0:
134 m=sob_re.match(loglines[i])
135 if m==None: break
136 first=m
137 i-=1
138 # if the last non-empty line matches our Signed-Off-by regex: extract username
139 if first!=None:
140 r=fixup_user(first.group(1),authors)
141 return r
142 return committer
144 def remove_gitmodules(ctx):
145 """Removes all submodules of ctx parents"""
146 # Removing all submoduies coming from all parents is safe, as the submodules
147 # of the current commit will be re-added below. A possible optimization would
148 # be to only remove the submodules of the first parent.
149 for parent_ctx in ctx.parents():
150 for submodule in parent_ctx.substate.keys():
151 wr(b'D %s' % submodule)
152 wr(b'D .gitmodules')
154 def refresh_git_submodule(name,subrepo_info):
155 wr(b'M 160000 %s %s' % (subrepo_info[1],name))
156 stderr_buffer.write(
157 b"Adding/updating submodule %s, revision %s\n" % (name, subrepo_info[1])
159 return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0])
161 def refresh_hg_submodule(name,subrepo_info):
162 gitRepoLocation=submodule_mappings[name] + b"/.git"
164 # Populate the cache to map mercurial revision to git revision
165 if not name in subrepo_cache:
166 subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"),
167 load_cache(gitRepoLocation+b"/hg2git-marks",
168 lambda s: int(s)-1))
170 (mapping_cache,marks_cache)=subrepo_cache[name]
171 subrepo_hash=subrepo_info[1]
172 if subrepo_hash in mapping_cache:
173 revnum=mapping_cache[subrepo_hash]
174 gitSha=marks_cache[int(revnum)]
175 wr(b'M 160000 %s %s' % (gitSha,name))
176 stderr_buffer.write(
177 b"Adding/updating submodule %s, revision %s->%s\n"
178 % (name, subrepo_hash, gitSha)
180 return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
181 submodule_mappings[name])
182 else:
183 stderr_buffer.write(
184 b"Warning: Could not find hg revision %s for %s in git %s\n"
185 % (subrepo_hash, name, gitRepoLocation,)
187 return b''
189 def refresh_gitmodules(ctx):
190 """Updates list of ctx submodules according to .hgsubstate file"""
191 remove_gitmodules(ctx)
192 gitmodules=b""
193 # Create the .gitmodules file and all submodules
194 for name,subrepo_info in ctx.substate.items():
195 if subrepo_info[2]==b'git':
196 gitmodules+=refresh_git_submodule(name,subrepo_info)
197 elif submodule_mappings and name in submodule_mappings:
198 gitmodules+=refresh_hg_submodule(name,subrepo_info)
200 if len(gitmodules):
201 wr(b'M 100644 inline .gitmodules')
202 wr_data(gitmodules)
204 def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
205 count=0
206 max=len(files)
207 is_submodules_refreshed=False
208 for file in files:
209 if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'):
210 is_submodules_refreshed=True
211 refresh_gitmodules(ctx)
212 # Skip .hgtags files. They only get us in trouble.
213 if not hgtags and file == b".hgtags":
214 stderr_buffer.write(b'Skip %s\n' % file)
215 continue
216 if encoding:
217 filename=file.decode(encoding).encode('utf8')
218 else:
219 filename=file
220 if b'.git' in filename.split(b'/'): # Even on Windows, the path separator is / here.
221 stderr_buffer.write(
222 b'Ignoring file %s which cannot be tracked by git\n' % filename
224 continue
225 file_ctx=ctx.filectx(file)
226 d=file_ctx.data()
228 if plugins and plugins['file_data_filters']:
229 file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
230 for filter in plugins['file_data_filters']:
231 filter(file_data)
232 d=file_data['data']
233 filename=file_data['filename']
234 file_ctx=file_data['file_ctx']
236 wr(b'M %s inline %s' % (gitmode(manifest.flags(file)),
237 strip_leading_slash(filename)))
238 wr(b'data %d' % len(d)) # had some trouble with size()
239 wr(d)
240 count+=1
241 if count%cfg_export_boundary==0:
242 stderr_buffer.write(b'Exported %d/%d files\n' % (count,max))
243 if max>cfg_export_boundary:
244 stderr_buffer.write(b'Exported %d/%d files\n' % (count,max))
246 def sanitize_name(name,what="branch", mapping={}):
247 """Sanitize input roughly according to git-check-ref-format(1)"""
249 # NOTE: Do not update this transform to work around
250 # incompatibilities on your platform. If you change it and it starts
251 # modifying names which previously were not touched it will break
252 # preexisting setups which are doing incremental imports.
254 # Fast-export tries to not inflict arbitrary naming policy on the
255 # user, instead it aims to provide mechanisms allowing the user to
256 # apply their own policy. Therefore do not add a transform which can
257 # already be implemented with the -B and -T options to mangle branch
258 # and tag names. If you have a source repository where this is too
259 # much work to do manually, write a tool that does it for you.
262 def dot(name):
263 if not name: return name
264 if name[0:1] == b'.': return b'_'+name[1:]
265 return name
267 if not auto_sanitize:
268 return mapping.get(name,name)
269 n=mapping.get(name,name)
270 p=re.compile(b'([\\[ ~^:?\\\\*]|\.\.)')
271 n=p.sub(b'_', n)
272 if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_'
273 n=b'/'.join([dot(s) for s in n.split(b'/')])
274 p=re.compile(b'_+')
275 n=p.sub(b'_', n)
277 if n!=name:
278 stderr_buffer.write(
279 b'Warning: sanitized %s [%s] to [%s]\n' % (what.encode(), name, n)
281 return n
283 def strip_leading_slash(filename):
284 if filename[0:1] == b'/':
285 return filename[1:]
286 return filename
288 def export_commit(ui,repo,revision,old_marks,max,count,authors,
289 branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
290 plugins={}):
291 def get_branchname(name):
292 if name in brmap:
293 return brmap[name]
294 n=sanitize_name(name, "branch", branchesmap)
295 brmap[name]=n
296 return n
298 ctx=repo[revision]
300 (_,user,(time,timezone),files,desc,branch,extra)=get_changeset(ui,repo,revision,authors,encoding)
301 if ctx.hidden():
302 return count
304 branch=get_branchname(branch)
306 parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
307 author = get_author(desc,user,authors)
308 hg_hash=ctx.hex()
310 if plugins and plugins['commit_message_filters']:
311 commit_data = {'branch': branch, 'parents': parents,
312 'author': author, 'desc': desc,
313 'revision': revision, 'hg_hash': hg_hash,
314 'committer': user, 'extra': extra}
315 for filter in plugins['commit_message_filters']:
316 filter(commit_data)
317 branch = commit_data['branch']
318 parents = commit_data['parents']
319 author = commit_data['author']
320 user = commit_data['committer']
321 desc = commit_data['desc'] + b'\n'
323 if len(parents)==0 and revision != 0:
324 wr(b'reset refs/heads/%s' % branch)
326 wr(b'commit refs/heads/%s' % branch)
327 wr(b'mark :%d' % (revision+1))
328 if sob:
329 wr(b'author %s %d %s' % (author,time,timezone))
330 wr(b'committer %s %d %s' % (user,time,timezone))
331 wr_data(desc)
333 man=ctx.manifest()
334 added,changed,removed,type=[],[],[],''
336 if len(parents) == 0:
337 # first revision: feed in full manifest
338 added=man.keys()
339 added.sort()
340 type='full'
341 else:
342 wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
343 if len(parents) == 1:
344 # later non-merge revision: feed in changed manifest
345 # if we have exactly one parent, just take the changes from the
346 # manifest without expensively comparing checksums
347 f=repo.status(parents[0],revision)
348 added,changed,removed=f.added,f.modified,f.removed
349 type='simple delta'
350 else: # a merge with two parents
351 wr(b'merge %s' % revnum_to_revref(parents[1], old_marks))
352 # later merge revision: feed in changed manifest
353 # for many files comparing checksums is expensive so only do it for
354 # merges where we really need it due to hg's revlog logic
355 added,changed,removed=get_filechanges(repo,revision,parents,man)
356 type='thorough delta'
358 stderr_buffer.write(
359 b'%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n'
360 % (branch, type.encode(), revision + 1, max, len(added), len(changed), len(removed))
363 for filename in removed:
364 if fn_encoding:
365 filename=filename.decode(fn_encoding).encode('utf8')
366 filename=strip_leading_slash(filename)
367 if filename==b'.hgsub':
368 remove_gitmodules(ctx)
369 wr(b'D %s' % filename)
371 export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
372 export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
373 wr()
375 return checkpoint(count)
377 def export_note(ui,repo,revision,count,authors,encoding,is_first):
378 ctx = repo[revision]
380 (_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
381 if ctx.hidden():
382 return count
384 wr(b'commit refs/notes/hg')
385 wr(b'committer %s %d %s' % (user,time,timezone))
386 wr(b'data 0')
387 if is_first:
388 wr(b'from refs/notes/hg^0')
389 wr(b'N inline :%d' % (revision+1))
390 hg_hash=ctx.hex()
391 wr_data(hg_hash)
392 wr()
393 return checkpoint(count)
395 def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
396 l=repo.tagslist()
397 for tag,node in l:
398 # Remap the branch name
399 tag=sanitize_name(tag,"tag",tagsmap)
400 # ignore latest revision
401 if tag==b'tip': continue
402 # ignore tags to nodes that are missing (ie, 'in the future')
403 if hexlify(node) not in mapping_cache:
404 stderr_buffer.write(b'Tag %s refers to unseen node %s\n' % (tag, hexlify(node)))
405 continue
407 rev=int(mapping_cache[hexlify(node)])
409 ref=revnum_to_revref(rev, old_marks)
410 if ref==None:
411 stderr_buffer.write(
412 b'Failed to find reference for creating tag %s at r%d\n' % (tag, rev)
414 continue
415 stderr_buffer.write(b'Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag, rev, ref))
416 wr(b'reset refs/tags/%s' % tag)
417 wr(b'from %s' % ref)
418 wr()
419 count=checkpoint(count)
420 return count
422 def load_mapping(name, filename, mapping_is_raw):
423 raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$')
424 string_regexp=b'"(((\\.)|(\\")|[^"])*)"'
425 quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$')
427 def parse_raw_line(line):
428 m=raw_regexp.match(line)
429 if m==None:
430 return None
431 return (m.group(1).strip(), m.group(2).strip())
433 def process_unicode_escape_sequences(s):
434 # Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
435 # the UTF8-encoded characters they represent. We need to do an additional
436 # .decode('utf8').encode('ascii', 'backslashreplace') to convert any non-ascii
437 # characters into their escape sequences so that the subsequent
438 # .decode('unicode-escape') succeeds:
439 return (
440 s.decode('utf8')
441 .encode('ascii', 'backslashreplace')
442 .decode('unicode-escape')
443 .encode('utf8')
446 def parse_quoted_line(line):
447 m=quoted_regexp.match(line)
448 if m==None:
449 return
451 return (process_unicode_escape_sequences(m.group(1)),
452 process_unicode_escape_sequences(m.group(5)))
454 cache={}
455 if not os.path.exists(filename):
456 sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
457 return cache
458 f=open(filename,'rb')
461 for line in f.readlines():
462 l+=1
463 line=line.strip()
464 if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings':
465 continue
466 elif line==b'' or line[0:1]==b'#':
467 continue
468 m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
469 if m==None:
470 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
471 continue
472 # put key:value in cache, key without ^:
473 cache[m[0]]=m[1]
474 a+=1
475 f.close()
476 sys.stderr.write('Loaded %d %s\n' % (a, name))
477 return cache
479 def branchtip(repo, heads):
480 '''return the tipmost branch head in heads'''
481 tip = heads[-1]
482 for h in reversed(heads):
483 if 'close' not in repo.changelog.read(h)[5]:
484 tip = h
485 break
486 return tip
488 def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
489 branches={}
490 for bn, heads in repo.branchmap().iteritems():
491 branches[bn] = branchtip(repo, heads)
492 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
493 l.sort()
495 # get list of hg's branches to verify, don't take all git has
496 for _,_,b in l:
497 b=get_branch(b)
498 sanitized_name=sanitize_name(b,"branch",branchesmap)
499 sha1=get_git_sha1(sanitized_name)
500 c=cache.get(sanitized_name)
501 if not c and sha1:
502 stderr_buffer.write(
503 b'Error: Branch [%s] already exists and was not created by hg-fast-export, '
504 b'export would overwrite unrelated branch\n' % b)
505 if not force: return False
506 elif sha1!=c:
507 stderr_buffer.write(
508 b'Error: Branch [%s] modified outside hg-fast-export:'
509 b'\n%s (repo) != %s (cache)\n' % (b, b'<None>' if sha1 is None else sha1, c)
511 if not force: return False
513 # verify that branch has exactly one head
514 t={}
515 unnamed_heads=False
516 for h in repo.filtered(b'visible').heads():
517 branch=get_branch(repo[h].branch())
518 if t.get(branch,False):
519 stderr_buffer.write(
520 b'Error: repository has an unnamed head: hg r%d\n'
521 % repo.changelog.rev(h)
523 unnamed_heads=True
524 if not force and not ignore_unnamed_heads: return False
525 t[branch]=True
526 if unnamed_heads and not force and not ignore_unnamed_heads: return False
527 return True
529 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
530 authors={},branchesmap={},tagsmap={},
531 sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
532 plugins={}):
533 def check_cache(filename, contents):
534 if len(contents) == 0:
535 sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename)
537 _max=int(m)
539 old_marks=load_cache(marksfile,lambda s: int(s)-1)
540 mapping_cache=load_cache(mappingfile)
541 heads_cache=load_cache(headsfile)
542 state_cache=load_cache(tipfile)
544 if len(state_cache) != 0:
545 for (name, data) in [(marksfile, old_marks),
546 (mappingfile, mapping_cache),
547 (headsfile, state_cache)]:
548 check_cache(name, data)
550 ui,repo=setup_repo(repourl)
552 if not verify_heads(ui,repo,heads_cache,force,ignore_unnamed_heads,branchesmap):
553 return 1
555 try:
556 tip=repo.changelog.count()
557 except AttributeError:
558 tip=len(repo)
560 min=int(state_cache.get(b'tip',0))
561 max=_max
562 if _max<0 or max>tip:
563 max=tip
565 for rev in range(0,max):
566 ctx=repo[rev]
567 if ctx.hidden():
568 continue
569 mapping_cache[ctx.hex()] = b"%d" % rev
571 if submodule_mappings:
572 # Make sure that all mercurial submodules are registered in the submodule-mappings file
573 for rev in range(0,max):
574 ctx=repo[rev]
575 if ctx.hidden():
576 continue
577 if ctx.substate:
578 for key in ctx.substate:
579 if ctx.substate[key][2]=='hg' and key not in submodule_mappings:
580 sys.stderr.write("Error: %s not found in submodule-mappings\n" % (key))
581 return 1
584 brmap={}
585 for rev in range(min,max):
586 c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
587 sob,brmap,hgtags,encoding,fn_encoding,
588 plugins)
589 if notes:
590 for rev in range(min,max):
591 c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
593 state_cache[b'tip']=max
594 state_cache[b'repo']=repourl
595 save_cache(tipfile,state_cache)
596 save_cache(mappingfile,mapping_cache)
598 c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap)
600 sys.stderr.write('Issued %d commands\n' % c)
602 return 0
604 if __name__=='__main__':
605 def bail(parser,opt):
606 sys.stderr.write('Error: No %s option given\n' % opt)
607 parser.print_help()
608 sys.exit(2)
610 parser=OptionParser()
612 parser.add_option("-n", "--no-auto-sanitize",action="store_false",
613 dest="auto_sanitize",default=True,
614 help="Do not perform built-in (broken in many cases) sanitizing of names")
615 parser.add_option("-m","--max",type="int",dest="max",
616 help="Maximum hg revision to import")
617 parser.add_option("--mapping",dest="mappingfile",
618 help="File to read last run's hg-to-git SHA1 mapping")
619 parser.add_option("--marks",dest="marksfile",
620 help="File to read git-fast-import's marks from")
621 parser.add_option("--heads",dest="headsfile",
622 help="File to read last run's git heads from")
623 parser.add_option("--status",dest="statusfile",
624 help="File to read status from")
625 parser.add_option("-r","--repo",dest="repourl",
626 help="URL of repo to import")
627 parser.add_option("-s",action="store_true",dest="sob",
628 default=False,help="Enable parsing Signed-off-by lines")
629 parser.add_option("--hgtags",action="store_true",dest="hgtags",
630 default=False,help="Enable exporting .hgtags files")
631 parser.add_option("-A","--authors",dest="authorfile",
632 help="Read authormap from AUTHORFILE")
633 parser.add_option("-B","--branches",dest="branchesfile",
634 help="Read branch map from BRANCHESFILE")
635 parser.add_option("-T","--tags",dest="tagsfile",
636 help="Read tags map from TAGSFILE")
637 parser.add_option("-f","--force",action="store_true",dest="force",
638 default=False,help="Ignore validation errors by force, implies --ignore-unnamed-heads")
639 parser.add_option("--ignore-unnamed-heads",action="store_true",dest="ignore_unnamed_heads",
640 default=False,help="Ignore unnamed head errors")
641 parser.add_option("-M","--default-branch",dest="default_branch",
642 help="Set the default branch")
643 parser.add_option("-o","--origin",dest="origin_name",
644 help="use <name> as namespace to track upstream")
645 parser.add_option("--hg-hash",action="store_true",dest="notes",
646 default=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
647 parser.add_option("-e",dest="encoding",
648 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
649 parser.add_option("--fe",dest="fn_encoding",
650 help="Assume file names from Mercurial are encoded in <filename_encoding>")
651 parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
652 help="Assume mappings are raw <key>=<value> lines")
653 parser.add_option("--filter-contents",dest="filter_contents",
654 help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")
655 parser.add_option("--plugin-path", type="string", dest="pluginpath",
656 help="Additional search path for plugins ")
657 parser.add_option("--plugin", action="append", type="string", dest="plugins",
658 help="Add a plugin with the given init string <name=init>")
659 parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
660 help="Provide a mapping file between the subrepository name and the submodule name")
662 (options,args)=parser.parse_args()
664 m=-1
665 auto_sanitize = options.auto_sanitize
666 if options.max!=None: m=options.max
668 if options.marksfile==None: bail(parser,'--marks')
669 if options.mappingfile==None: bail(parser,'--mapping')
670 if options.headsfile==None: bail(parser,'--heads')
671 if options.statusfile==None: bail(parser,'--status')
672 if options.repourl==None: bail(parser,'--repo')
674 if options.subrepo_map:
675 if not os.path.exists(options.subrepo_map):
676 sys.stderr.write('Subrepo mapping file not found %s\n'
677 % options.subrepo_map)
678 sys.exit(1)
679 submodule_mappings=load_mapping('subrepo mappings',
680 options.subrepo_map,False)
682 a={}
683 if options.authorfile!=None:
684 a=load_mapping('authors', options.authorfile, options.raw_mappings)
686 b={}
687 if options.branchesfile!=None:
688 b=load_mapping('branches', options.branchesfile, options.raw_mappings)
690 t={}
691 if options.tagsfile!=None:
692 t=load_mapping('tags', options.tagsfile, options.raw_mappings)
694 if options.default_branch!=None:
695 set_default_branch(options.default_branch)
697 if options.origin_name!=None:
698 set_origin_name(options.origin_name)
700 encoding=''
701 if options.encoding!=None:
702 encoding=options.encoding
704 fn_encoding=encoding
705 if options.fn_encoding!=None:
706 fn_encoding=options.fn_encoding
708 plugins=[]
709 if options.plugins!=None:
710 plugins+=options.plugins
712 if options.filter_contents!=None:
713 plugins+=['shell_filter_file_contents='+options.filter_contents]
715 plugins_dict={}
716 plugins_dict['commit_message_filters']=[]
717 plugins_dict['file_data_filters']=[]
719 if plugins and options.pluginpath:
720 sys.stderr.write('Using additional plugin path: ' + options.pluginpath + '\n')
722 for plugin in plugins:
723 split = plugin.split('=')
724 name, opts = split[0], '='.join(split[1:])
725 i = pluginloader.get_plugin(name,options.pluginpath)
726 sys.stderr.write('Loaded plugin ' + i['name'] + ' from path: ' + i['path'] +' with opts: ' + opts + '\n')
727 plugin = pluginloader.load_plugin(i).build_filter(opts)
728 if hasattr(plugin,'file_data_filter') and callable(plugin.file_data_filter):
729 plugins_dict['file_data_filters'].append(plugin.file_data_filter)
730 if hasattr(plugin, 'commit_message_filter') and callable(plugin.commit_message_filter):
731 plugins_dict['commit_message_filters'].append(plugin.commit_message_filter)
733 sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
734 options.headsfile, options.statusfile,
735 authors=a,branchesmap=b,tagsmap=t,
736 sob=options.sob,force=options.force,
737 ignore_unnamed_heads=options.ignore_unnamed_heads,
738 hgtags=options.hgtags,
739 notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
740 plugins=plugins_dict))