From 98320356146739154693d37eb906880d9f2ebf5a Mon Sep 17 00:00:00 2001 From: Rocco Rutte Date: Tue, 6 Mar 2007 17:00:25 +0000 Subject: [PATCH] Initial import This is the initial import of 'hg2git' being a converter which feeds a hg repository into git-fast-import(1). --- hg2git.py | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hg2git.sh | 76 +++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 hg2git.py create mode 100755 hg2git.sh diff --git a/hg2git.py b/hg2git.py new file mode 100644 index 0000000..0bbbe03 --- /dev/null +++ b/hg2git.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python + +# Copyright (c) 2007 Rocco Rutte +# License: GPLv2 + +"""hg2git.py - A mercurial-to-git filter for git-fast-import(1) +Usage: hg2git.py +""" + +from mercurial import repo,hg,cmdutil,util,ui,revlog +from tempfile import mkstemp +import re +import sys +import os + +# silly regex to see if user field has email address +user_re=re.compile('[^<]+ <[^>]+>$') +# git branch for hg's default 'HEAD' branch +cfg_master='master' +# insert 'checkpoint' command after this many commits +cfg_checkpoint_count=1000 + +def usage(ret): + sys.stderr.write(__doc__) + return ret + +def setup_repo(url): + myui=ui.ui() + return myui,hg.repository(myui,url) + +def get_changeset(ui,repo,revision): + def get_branch(name): + if name=='HEAD': + name=cfg_master + return name + def fixup_user(user): + if user_re.match(user)==None: + if '@' not in user: + return user+' ' + return user+' <'+user+'>' + return user + node=repo.lookup(revision) + (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node) + tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60)) + branch=get_branch(extra.get('branch','master')) + return (manifest,fixup_user(user),(time,tz),files,desc,branch,extra) + +def gitmode(x): + return x and '100755' or '100644' + +def wr(msg=''): + print msg + #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) + +def checkpoint(count): + count=count+1 + if count%cfg_checkpoint_count==0: + sys.stderr.write("Checkpoint after %d commits\n" % count) + wr('checkpoint') + wr() + return count + +def get_parent_mark(parent,marks): + p=marks.get(str(parent),None) + if p==None: + # if we didn't see parent previously, assume we saw it in this run + p=':%d' % (parent+1) + return p + +def export_commit(ui,repo,revision,marks,heads,last,max,count): + sys.stderr.write('Exporting revision %d (tip %d) as [:%d]\n' % (revision,max,revision+1)) + + (_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision) + parents=repo.changelog.parentrevs(revision) + + # we need this later to write out tags + marks[str(revision)]=':%d'%(revision+1) + + wr('commit refs/heads/%s' % branch) + wr('mark :%d' % (revision+1)) + wr('committer %s %d %s' % (user,time,timezone)) + wr('data %d' % (len(desc)+1)) # wtf? + wr(desc) + wr() + + src=heads.get(branch,'') + link='' + if src!='': + # if we have a cached head, this is an incremental import: initialize it + # and kill reference so we won't init it again + wr('from %s' % src) + heads[branch]='' + elif not heads.has_key(branch) and revision>0: + # newly created branch and not the first one: connect to parent + tmp=get_parent_mark(parents[0],marks) + wr('from %s' % tmp) + sys.stderr.write('Link new branch [%s] to parent [%s]\n' % + (branch,tmp)) + link=tmp # avoid making a merge commit for branch fork + + if parents: + l=last.get(branch,revision) + for p in parents: + # 1) as this commit implicitely is the child of the most recent + # commit of this branch, ignore this parent + # 2) ignore nonexistent parents + # 3) merge otherwise + if p==l or p==revision or p<0: + continue + tmp=get_parent_mark(p,marks) + # if we fork off a branch, don't merge via 'merge' as we have + # 'from' already above + if tmp==link: + continue + sys.stderr.write('Merging branch [%s] with parent [%s] from [r%d]\n' % + (branch,tmp,p)) + wr('merge %s' % tmp) + + last[branch]=revision + heads[branch]='' + + ctx=repo.changectx(str(revision)) + man=ctx.manifest() + + wr('deleteall') + + for f in man.keys(): + fctx=ctx.filectx(f) + d=fctx.data() + wr('M %s inline %s' % (gitmode(man.execf(f)),f)) + wr('data %d' % len(d)) # had some trouble with size() + wr(d) + + wr() + return checkpoint(count) + +def export_tags(ui,repo,cache,count): + l=repo.tagslist() + for tag,node in l: + if tag=='tip': + continue + rev=repo.changelog.rev(node) + ref=cache.get(str(rev),None) + if ref==None: + sys.stderr.write('Failed to find reference for creating tag' + ' %s at r%d\n' % (tag,rev)) + continue + (_,user,(time,timezone),_,desc,branch,_)=get_changeset(ui,repo,rev) + sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) + wr('tag %s' % tag) + wr('from %s' % ref) + wr('tagger %s %d %s' % (user,time,timezone)) + msg='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag, + rev,branch,desc.split('\n')[0]) + wr('data %d' % (len(msg)+1)) + wr(msg) + wr() + count=checkpoint(count) + return count + +def load_cache(filename): + cache={} + if not os.path.exists(filename): + return cache + f=open(filename,'r') + l=0 + for line in f.readlines(): + l+=1 + fields=line.split(' ') + if fields==None or not len(fields)==2 or fields[0][0]!=':': + sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) + continue + # put key:value in cache, key without ^: + cache[fields[0][1:]]=fields[1].split('\n')[0] + f.close() + return cache + +def save_cache(filename,cache): + f=open(filename,'w+') + map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys()) + f.close() + +def verify_heads(ui,repo,cache): + def getsha1(branch): + f=open(os.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch) + sha1=f.readlines()[0].split('\n')[0] + f.close() + return sha1 + + for b in cache.keys(): + sys.stderr.write('Verifying branch [%s]\n' % b) + sha1=getsha1(b) + c=cache.get(b) + if sha1!=c: + sys.stderr.write('Warning: Branch [%s] modified outside hg2git:' + '\n%s (repo) != %s (cache)\n' % (b,sha1,c)) + return True + +if __name__=='__main__': + if len(sys.argv)!=6: sys.exit(usage(1)) + repourl,m,marksfile,headsfile,tipfile=sys.argv[1:] + _max=int(m) + + marks_cache=load_cache(marksfile) + heads_cache=load_cache(headsfile) + state_cache=load_cache(tipfile) + + ui,repo=setup_repo(repourl) + + if not verify_heads(ui,repo,heads_cache): + sys.exit(1) + + tip=repo.changelog.count() + + min=int(state_cache.get('tip',0)) + max=_max + if _max<0: + max=tip + + c=int(state_cache.get('count',0)) + last={} + for rev in range(min,max): + c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,tip,c) + + c=export_tags(ui,repo,marks_cache,c) + + state_cache['tip']=max + state_cache['count']=c + state_cache['repo']=repourl + save_cache(tipfile,state_cache) diff --git a/hg2git.sh b/hg2git.sh new file mode 100755 index 0000000..c51c1d5 --- /dev/null +++ b/hg2git.sh @@ -0,0 +1,76 @@ +#!/bin/sh + +USAGE='[-m max] repo' +LONG_USAGE='Import hg repository up to either tip or ' +ROOT="`dirname $0`" +REPO="" +MAX="-1" +PFX="hg2git" +SFX_MARKS="marks" +SFX_HEADS="heads" +SFX_STATE="state" + +. git-sh-setup +cd_to_toplevel + +while case "$#" in 0) break ;; esac +do + case "$1" in + -m) + shift + MAX="$1" + ;; + -*) + usage + ;; + *) + break + ;; + esac + shift +done + +if [ "$#" != 1 ] ; then + usage + exit 1 +fi + +REPO="$1" + +# make sure we have a marks cache +if [ ! -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then + touch "$GIT_DIR/$PFX-$SFX_MARKS" +fi + +GIT_DIR="$GIT_DIR" python "$ROOT/hg2git.py" \ + "$REPO" \ + "$MAX" \ + "$GIT_DIR/$PFX-$SFX_MARKS" \ + "$GIT_DIR/$PFX-$SFX_HEADS" \ + "$GIT_DIR/$PFX-$SFX_STATE" \ +| git-fast-import --export-marks="$GIT_DIR/$PFX-$SFX_MARKS.tmp" \ +|| die 'Git fast-import failed' + +# move recent marks cache out of the way... +if [ -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then + mv "$GIT_DIR/$PFX-$SFX_MARKS" "$GIT_DIR/$PFX-$SFX_MARKS.old" +else + touch "$GIT_DIR/$PFX-$SFX_MARKS.old" +fi + +# ...to create a new merged one +cat "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp" \ +| uniq > "$GIT_DIR/$PFX-$SFX_MARKS" + +# cleanup +rm -rf "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp" + +# save SHA1s of current heads for incremental imports +# and connectivity (plus sanity checking) +for head in `ls "$GIT_DIR/refs/heads"` ; do + id="`git-rev-parse $head`" + echo ":$head $id" +done > "$GIT_DIR/$PFX-$SFX_HEADS" + +# check diff with color: +# ( for i in `find . -type f | grep -v '\.git'` ; do diff -u $i $REPO/$i ; done | cdiff ) | less -r -- 2.11.4.GIT