hg-fast-export: add support for --flatten option
[fast-export/rorcz.git] / hg2git.py
blobb875e1bd03a843351d9ea23f702aa21276cab048
1 #!/usr/bin/env python
3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import hg,ui
7 import re
8 import os
9 import sys
11 # default git branch name
12 cfg_master='master'
13 # default origin name
14 origin_name=''
15 # default email address when unknown
16 unknown_addr='devnull@localhost'
17 split_name_re = re.compile(r'^((?:[^<]|<at>)*?)(?:<(?!at>)(?:(?:([^>]*)|(?:(.*)>(.*)))))?$', re.S|re.I)
18 git_crud = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' \
19 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' \
20 ' .,:;<>"\\' "'"
21 git_delch_re = re.compile(r'[<>\n]+', re.S)
22 spelled_at_re = re.compile(r'^(.*?)(?:@|(?:(?:^|\s)<at>(?:\s|$)))', re.I)
24 def set_default_branch(name):
25 global cfg_master
26 cfg_master = name
28 def set_origin_name(name):
29 global origin_name
30 origin_name = name
32 def setup_repo(url):
33 try:
34 myui=ui.ui(interactive=False)
35 except TypeError:
36 myui=ui.ui()
37 myui.setconfig('ui', 'interactive', 'off')
38 return myui,hg.repository(myui,url)
40 # Git strips "crud" characters off both the beginning and end of the user's name
41 # and the user's email, then deletes any remaining '<', '>' and '\n' characters
42 # before combining the user name with the user email surrounded by '<' and '>'.
43 # This function provides the crud-stripping and deletion operation that is used
44 # on both the name and email.
45 def gitname(name):
46 name = name.strip(git_crud)
47 return git_delch_re.sub('', name)
49 def set_unknown_addr(addr):
50 global unknown_addr
51 ans=False
52 if addr!=None:
53 addr=gitname(addr)
54 if addr!='':
55 unknown_addr=addr
56 ans=True
57 return ans
59 # Split the combined name and email input into a separate name and email and
60 # apply Git's rules to each part. The idea is to use anything to the left of
61 # the first '<' and to the right of the last '>' as the name. Anything between
62 # the first '<' and the last '>' is treated as the email. If there is no '<'
63 # but the name contains '@' (which may be spelled out) then treat the entire
64 # thing as an email with no name. If the detected name is empty then anything
65 # up to the first '@' (which may be spelled out) in the email is used for the
66 # name. Failing that the entire email is used for the name.
67 def split_name_email(combined):
68 name = ''
69 email,rawemail = '',''
70 match = split_name_re.match(combined)
71 if match:
72 left,rest,mid,right = match.groups()
73 if rest != None:
74 name = gitname(left)
75 rawemail = rest
76 email = gitname(rawemail)
77 elif mid != None:
78 name = gitname(left.rstrip() + ' ' + right.lstrip())
79 rawemail = mid
80 email = gitname(rawemail)
81 else:
82 name = gitname(left)
83 if email == '' and spelled_at_re.match(left):
84 rawemail = left
85 email = name
86 name = ''
87 if name == '':
88 at = spelled_at_re.match(rawemail)
89 if at:
90 name = gitname(at.group(1))
91 if name == '':
92 name = email
93 # We do this test to be compatible with the previous behavior of hg2git.py
94 # When it's given a <email> without any name and email does not contain '@'
95 # then it sets the email to the unknown address
96 if (len(left) < 2 or left[-1] != ' ') and not at:
97 email = ''
98 return [name, email]
100 def fixup_user(user,authors):
101 user=user.strip("\"")
102 if authors!=None:
103 # if we have an authors table, try to get mapping
104 # by defaulting to the current value of 'user'
105 user=authors.get(user,user)
106 name,mail=split_name_email(user)
107 if mail == '':
108 # If we don't have an email address replace it with unknown_addr.
109 mail = unknown_addr
110 if name == '':
111 # Git does not like an empty name either -- split_name_email can only
112 # return an empty name if it also returns an empty email. This probably
113 # will never happen since the input would have to be empty or only "crud"
114 # characters, but check just to be safe.
115 name = '-'
116 return '%s <%s>' % (name,mail)
118 def get_branch(name):
119 # 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
120 # other CVS imports may need it, too
121 if name=='HEAD' or name=='default' or name=='':
122 name=cfg_master
123 if origin_name:
124 return origin_name + '/' + name
125 return name
127 def get_changeset(ui,repo,revision,authors={}):
128 node=repo.lookup(revision)
129 (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
130 tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
131 branch=get_branch(extra.get('branch','master'))
132 return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
134 def mangle_key(key):
135 return key
137 def load_cache(filename,get_key=mangle_key):
138 cache={}
139 if not os.path.exists(filename):
140 return cache
141 f=open(filename,'r')
143 for line in f.readlines():
144 l+=1
145 fields=line.split(' ')
146 if fields==None or not len(fields)==2 or fields[0][0]!=':':
147 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
148 continue
149 # put key:value in cache, key without ^:
150 cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
151 f.close()
152 return cache
154 def save_cache(filename,cache):
155 f=open(filename,'w+')
156 map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
157 f.close()
159 def get_git_sha1(name,type='heads'):
160 try:
161 # use git-rev-parse to support packed refs
162 cmd="git rev-parse --verify refs/%s/%s 2>%s" % (type,name,os.devnull)
163 p=os.popen(cmd)
164 l=p.readline()
165 p.close()
166 if l == None or len(l) == 0:
167 return None
168 return l[0:40]
169 except IOError:
170 return None