3 # Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial
import repo
,hg
,cmdutil
,util
,ui
,revlog
,node
7 from hg2git
import setup_repo
,fixup_user
,get_branch
,get_changeset
,load_cache
,save_cache
,get_git_sha1
8 from tempfile
import mkstemp
9 from optparse
import OptionParser
14 # silly regex to catch Signed-off-by lines in log message
15 sob_re
=re
.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
16 # insert 'checkpoint' command after this many commits or none at all if 0
17 cfg_checkpoint_count
=0
18 # write some progress message every this many file contents written
19 cfg_export_boundary
=1000
22 return x
and '100755' or '100644'
26 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
28 def checkpoint(count
):
30 if cfg_checkpoint_count
>0 and count
%cfg_checkpoint
_count
==0:
31 sys
.stderr
.write("Checkpoint after %d commits\n" % count
)
36 def get_parent_mark(parent
,marks
):
37 """Get the mark for some parent.
38 If we saw it in the current session, return :%d syntax and
39 otherwise the SHA1 from the cache."""
40 return marks
.get(str(parent
+1),':%d' % (parent
+1))
43 """See if two revisions of a file are not equal."""
44 return node
.hex(f1
)!=node
.hex(f2
)
46 def outer_set(dleft
,dright
,l
,c
,r
):
47 """Loop over our repository and find all changed and missing files."""
48 for left
in dleft
.keys():
49 right
=dright
.get(left
,None)
51 # we have the file but our parent hasn't: add to left set
53 elif mismatch(dleft
[left
],right
):
54 # we have it but checksums mismatch: add to center set
56 for right
in dright
.keys():
57 left
=dleft
.get(right
,None)
59 # if parent has file but we don't: add to right set
61 # change is already handled when comparing child against parent
64 def get_filechanges(repo
,revision
,parents
,mleft
):
65 """Given some repository and revision, find all changed/deleted files."""
69 mright
=repo
.changectx(p
).manifest()
74 l
,c
,r
=outer_set(mleft
,mright
,l
,c
,r
)
77 def get_author(logmessage
,committer
,authors
):
78 """As git distincts between author and committer of a patch, try to
79 extract author by detecting Signed-off-by lines.
81 This walks from the end of the log message towards the top skipping
82 empty lines. Upon the first non-empty line, it walks all Signed-off-by
83 lines upwards to find the first one. For that (if found), it extracts
84 authorship information the usual way (authors table, cleaning, etc.)
86 If no Signed-off-by line is found, this defaults to the committer.
88 This may sound stupid (and it somehow is), but in log messages we
89 accidentially may have lines in the middle starting with
90 "Signed-off-by: foo" and thus matching our detection regex. Prevent
93 loglines
=logmessage
.split('\n')
95 # from tail walk to top skipping empty lines
98 if len(loglines
[i
].strip())==0: continue
101 # walk further upwards to find first sob line, store in 'first'
104 m
=sob_re
.match(loglines
[i
])
108 # if the last non-empty line matches our Signed-Off-by regex: extract username
110 r
=fixup_user(first
.group(1),authors
)
114 def export_file_contents(ctx
,manifest
,files
):
119 fctx
=ctx
.filectx(file)
121 wr('M %s inline %s' % (gitmode(manifest
.execf(file)),file))
122 wr('data %d' % len(d
)) # had some trouble with size()
125 if count
%cfg_export
_boundary
==0:
126 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
127 if max>cfg_export_boundary
:
128 sys
.stderr
.write('Exported %d/%d files\n' % (count
,max))
130 def is_merge(parents
):
132 for parent
in parents
:
137 def export_commit(ui
,repo
,revision
,marks
,heads
,last
,max,count
,authors
,sob
):
138 (revnode
,_
,user
,(time
,timezone
),files
,desc
,branch
,_
)=get_changeset(ui
,repo
,revision
,authors
)
139 parents
=repo
.changelog
.parentrevs(revision
)
141 wr('commit refs/heads/%s' % branch
)
142 wr('mark :%d' % (revision
+1))
144 wr('author %s %d %s' % (get_author(desc
,user
,authors
),time
,timezone
))
145 wr('committer %s %d %s' % (user
,time
,timezone
))
146 wr('data %d' % (len(desc
)+1)) # wtf?
150 src
=heads
.get(branch
,'')
153 # if we have a cached head, this is an incremental import: initialize it
154 # and kill reference so we won't init it again
157 sys
.stderr
.write('Initializing branch [%s] to parent [%s]\n' %
159 link
=src
# avoid making a merge commit for incremental import
160 elif link
=='' and not heads
.has_key(branch
) and revision
>0:
161 # newly created branch and not the first one: connect to parent
162 tmp
=get_parent_mark(parents
[0],marks
)
164 sys
.stderr
.write('Link new branch [%s] to parent [%s]\n' %
166 link
=tmp
# avoid making a merge commit for branch fork
169 l
=last
.get(branch
,revision
)
171 # 1) as this commit implicitely is the child of the most recent
172 # commit of this branch, ignore this parent
173 # 2) ignore nonexistent parents
175 if p
==l
or p
==revision
or p
<0:
177 tmp
=get_parent_mark(p
,marks
)
178 # if we fork off a branch, don't merge with our parent via 'merge'
179 # as we have 'from' already above
182 sys
.stderr
.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
186 last
[branch
]=revision
188 # we need this later to write out tags
189 marks
[str(revision
)]=':%d'%(revision
+1)
191 ctx
=repo
.changectx(str(revision
))
193 added
,changed
,removed
,type=[],[],[],''
196 # first revision: feed in full manifest
199 elif is_merge(parents
):
200 # later merge revision: feed in changed manifest
201 # for many files comparing checksums is expensive so only do it for
202 # merges where we really need it due to hg's revlog logic
203 added
,changed
,removed
=get_filechanges(repo
,revision
,parents
,man
)
204 type='thorough delta'
206 # later non-merge revision: feed in changed manifest
207 # if we have exactly one parent, just take the changes from the
208 # manifest without expensively comparing checksums
209 f
=repo
.status(repo
.lookup(parents
[0]),revnode
)[:3]
210 added
,changed
,removed
=f
[1],f
[0],f
[2]
213 sys
.stderr
.write('Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
214 (type,revision
+1,max,len(added
),len(changed
),len(removed
)))
216 map(lambda r
: wr('D %s' % r
),removed
)
217 export_file_contents(ctx
,man
,added
+changed
)
220 return checkpoint(count
)
222 def export_tags(ui
,repo
,marks_cache
,start
,end
,count
,authors
):
225 # ignore latest revision
226 if tag
=='tip': continue
227 rev
=repo
.changelog
.rev(node
)
228 # ignore those tags not in our import range
229 if rev
<start
or rev
>=end
: continue
231 ref
=get_parent_mark(rev
,marks_cache
)
233 sys
.stderr
.write('Failed to find reference for creating tag'
234 ' %s at r%d\n' % (tag
,rev
))
236 sys
.stderr
.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag
,rev
,ref
))
237 wr('reset refs/tags/%s' % tag
)
240 count
=checkpoint(count
)
243 def load_authors(filename
):
245 if not os
.path
.exists(filename
):
249 lre
=re
.compile('^([^=]+)[ ]*=[ ]*(.+)$')
250 for line
in f
.readlines():
254 sys
.stderr
.write('Invalid file format in [%s], line %d\n' % (filename
,l
))
256 # put key:value in cache, key without ^:
257 cache
[m
.group(1).strip()]=m
.group(2).strip()
259 sys
.stderr
.write('Loaded %d authors\n' % l
)
262 def verify_heads(ui
,repo
,cache
,force
):
263 branches
=repo
.branchtags()
264 l
=[(-repo
.changelog
.rev(n
), n
, t
) for t
, n
in branches
.items()]
267 # get list of hg's branches to verify, don't take all git has
272 if sha1
!=None and c
!=None:
273 sys
.stderr
.write('Verifying branch [%s]\n' % b
)
275 sys
.stderr
.write('Error: Branch [%s] modified outside hg-fast-export:'
276 '\n%s (repo) != %s (cache)\n' % (b
,sha1
,c
))
277 if not force
: return False
279 # verify that branch has exactly one head
281 for h
in repo
.heads():
282 (_
,_
,_
,_
,_
,_
,branch
,_
)=get_changeset(ui
,repo
,h
)
283 if t
.get(branch
,False):
284 sys
.stderr
.write('Error: repository has at least one unnamed head: hg r%s\n' %
285 repo
.changelog
.rev(h
))
286 if not force
: return False
291 def hg2git(repourl
,m
,marksfile
,headsfile
,tipfile
,authors
={},sob
=False,force
=False):
294 marks_cache
=load_cache(marksfile
)
295 heads_cache
=load_cache(headsfile
)
296 state_cache
=load_cache(tipfile
)
298 ui
,repo
=setup_repo(repourl
)
300 if not verify_heads(ui
,repo
,heads_cache
,force
):
303 tip
=repo
.changelog
.count()
305 min=int(state_cache
.get('tip',0))
312 for rev
in range(min,max):
313 c
=export_commit(ui
,repo
,rev
,marks_cache
,heads_cache
,last
,max,c
,authors
,sob
)
315 c
=export_tags(ui
,repo
,marks_cache
,min,max,c
,authors
)
317 sys
.stderr
.write('Issued %d commands\n' % c
)
319 state_cache
['tip']=max
320 state_cache
['repo']=repourl
321 save_cache(tipfile
,state_cache
)
325 if __name__
=='__main__':
326 def bail(parser
,opt
):
327 sys
.stderr
.write('Error: No %s option given\n' % opt
)
331 parser
=OptionParser()
333 parser
.add_option("-m","--max",type="int",dest
="max",
334 help="Maximum hg revision to import")
335 parser
.add_option("--marks",dest
="marksfile",
336 help="File to read git-fast-import's marks from")
337 parser
.add_option("--heads",dest
="headsfile",
338 help="File to read last run's git heads from")
339 parser
.add_option("--status",dest
="statusfile",
340 help="File to read status from")
341 parser
.add_option("-r","--repo",dest
="repourl",
342 help="URL of repo to import")
343 parser
.add_option("-s",action
="store_true",dest
="sob",
344 default
=False,help="Enable parsing Signed-off-by lines")
345 parser
.add_option("-A","--authors",dest
="authorfile",
346 help="Read authormap from AUTHORFILE")
347 parser
.add_option("-f","--force",action
="store_true",dest
="force",
348 default
=False,help="Ignore validation errors by force")
350 (options
,args
)=parser
.parse_args()
353 if options
.max!=None: m
=options
.max
355 if options
.marksfile
==None: bail(parser
,'--marks')
356 if options
.headsfile
==None: bail(parser
,'--heads')
357 if options
.statusfile
==None: bail(parser
,'--status')
358 if options
.repourl
==None: bail(parser
,'--repo')
361 if options
.authorfile
!=None:
362 a
=load_authors(options
.authorfile
)
364 sys
.exit(hg2git(options
.repourl
,m
,options
.marksfile
,options
.headsfile
,
365 options
.statusfile
,authors
=a
,sob
=options
.sob
,force
=options
.force
))