git-remote-hg: add hgimport, an hg-fast-import equivalent
[git/dscho.git] / git_remote_helpers / hg / hgimport.py
blob36fee8e96b51b7bd9515653d38a28fcd408806bc
1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 """Processor of import commands.
19 This module provides core processing functionality including an abstract class
20 for basing real processors on. See the processors package for examples.
21 """
23 import os
24 import shutil
26 from mercurial import context
27 from mercurial.node import nullid, hex
29 from git_remote_helpers.util import die
30 from git_remote_helpers.fastimport import processor, parser
33 class commit(object):
34 def __init__(self, author, date, desc, parents, branch=None, rev=None,
35 extra={}, sortkey=None):
36 self.author = author or 'unknown'
37 self.date = date or '0 0'
38 self.desc = desc
39 self.parents = parents
40 self.branch = branch
41 self.rev = rev
42 self.extra = extra
43 self.sortkey = sortkey
46 class HgImportProcessor(processor.ImportProcessor):
48 def __init__(self, ui, repo):
49 super(HgImportProcessor, self).__init__()
50 self.ui = ui
51 self.repo = repo
53 self.branchnames = True
55 self.idmap = {}
56 self.commitmap = {} # map commit ID (":1") to commit object
57 self.branchmap = {} # map branch name to list of heads
59 self.tags = [] # list of (tag, mark) tuples
61 self.numblobs = 0 # for progress reporting
62 self.blobdir = None
64 def setup(self):
65 """Setup before processing any streams."""
66 pass
68 def teardown(self):
69 """Cleanup after processing all streams."""
70 if self.blobdir and os.path.exists(self.blobdir):
71 self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
72 shutil.rmtree(self.blobdir)
74 def load_marksfile(self, name):
75 try:
76 f = open(name)
77 lines = f.readlines()
78 f.close()
79 parsed = [i.strip().split(' ') for i in lines]
80 self.idmap = dict((i[0], i[1]) for i in parsed)
81 except IOError, e:
82 die("load: %s", str(e))
84 def write_marksfile(self, name):
85 try:
86 f = open(name, "w")
87 for pair in sorted(self.idmap.iteritems()):
88 f.write("%s %s\n" % pair)
89 f.close()
90 except IOError, e:
91 die("write: %s", str(e))
93 def progress_handler(self, cmd):
94 self.ui.write("Progress: %s\n" % cmd.message)
96 def blob_handler(self, cmd):
97 self.writeblob(cmd.id, cmd.data)
99 def _getblobfilename(self, blobid):
100 if self.blobdir is None:
101 raise RuntimeError("no blobs seen, so no blob directory created")
102 # XXX should escape ":" for windows
103 return os.path.join(self.blobdir, "blob-" + blobid)
105 def getblob(self, fileid):
106 (commitid, blobid) = fileid
107 f = open(self._getblobfilename(blobid), "rb")
108 try:
109 return f.read()
110 finally:
111 f.close()
113 def writeblob(self, blobid, data):
114 if self.blobdir is None: # no blobs seen yet
115 self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
116 if not os.path.exists(self.blobdir):
117 os.mkdir(self.blobdir)
119 fn = self._getblobfilename(blobid)
120 blobfile = open(fn, "wb")
121 #self.ui.debug("writing blob %s to %s (%d bytes)\n"
122 # % (blobid, fn, len(data)))
123 blobfile.write(data)
124 blobfile.close()
126 self.numblobs += 1
127 if self.numblobs % 500 == 0:
128 self.ui.status("%d blobs read\n" % self.numblobs)
130 def getmode(self, name, fileid):
131 (commitid, blobid) = fileid
132 return self.filemodes[commitid][name]
134 def checkpoint_handler(self, cmd):
135 # This command means nothing to us
136 pass
138 def _getcommit(self, committish):
139 """Given a mark reference or a branch name, return the
140 appropriate commit object. Return None if committish is a
141 branch with no commits. Raises KeyError if anything else is out
142 of whack.
144 if committish.startswith(":"):
145 # KeyError here indicates the input stream is broken.
146 return self.commitmap[committish]
147 else:
148 branch = self._getbranch(committish)
149 if branch is None:
150 raise ValueError("invalid committish: %r" % committish)
152 heads = self.branchmap.get(branch)
153 if heads is None:
154 return None
155 else:
156 # KeyError here indicates bad commit id in self.branchmap.
157 return self.commitmap[heads[-1]]
159 def _getbranch(self, ref):
160 """Translate a Git head ref to corresponding Mercurial branch
161 name. E.g. \"refs/heads/foo\" is translated to \"foo\".
162 Special case: \"refs/heads/master\" becomes \"default\". If
163 'ref' is not a head ref, return None.
165 prefix = "refs/heads/"
166 if ref.startswith(prefix):
167 branch = ref[len(prefix):]
168 if branch == "master":
169 return "default"
170 else:
171 return branch
172 else:
173 return None
175 def commit_handler(self, cmd):
176 # XXX this assumes the fixup branch name used by cvs2git. In
177 # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under
178 # refs/heads), and implies that it can be called whatever the
179 # creator of the fastimport dump wants to call it. So the name
180 # of the fixup branch should be configurable!
181 fixup = (cmd.ref == "refs/heads/TAG.FIXUP")
183 if cmd.from_:
184 first_parent = cmd.from_
185 else:
186 first_parent = self._getcommit(cmd.ref) # commit object
187 if first_parent is not None:
188 first_parent = first_parent.rev # commit id
190 if cmd.merges:
191 if len(cmd.merges) > 1:
192 raise NotImplementedError("Can't handle more than two parents")
193 second_parent = cmd.merges[0]
194 else:
195 second_parent = None
197 if first_parent is None and second_parent is not None:
198 # First commit on a new branch that has 'merge' but no 'from':
199 # special case meaning branch starts with no files; the contents of
200 # the first commit (this one) determine the list of files at branch
201 # time.
202 first_parent = second_parent
203 second_parent = None
204 no_files = True # XXX this is ignored...
206 self.ui.debug("commit %s: first_parent = %r, second_parent = %r\n"
207 % (cmd, first_parent, second_parent))
208 assert ((first_parent != second_parent) or
209 (first_parent is second_parent is None)), \
210 ("commit %s: first_parent == second parent = %r"
211 % (cmd, first_parent))
213 # Figure out the Mercurial branch name.
214 if fixup and first_parent is not None:
215 # If this is a fixup commit, pretend it happened on the same
216 # branch as its first parent. (We don't want a Mercurial
217 # named branch called "TAG.FIXUP" in the output repository.)
218 branch = self.commitmap[first_parent].branch
219 else:
220 branch = self._getbranch(cmd.ref)
222 commit_handler = HgImportCommitHandler(
223 self, cmd, self.ui)
224 commit_handler.process()
225 modified = dict(commit_handler.modified)
226 modes = commit_handler.mode
227 copies = commit_handler.copies
229 # in case we are converting from git or bzr, prefer author but
230 # fallback to committer (committer is required, author is
231 # optional)
232 userinfo = cmd.author or cmd.committer
233 if userinfo[0] == userinfo[1]:
234 # In order to conform to fastimport syntax, cvs2git with no
235 # authormap produces author names like "jsmith <jsmith>"; if
236 # we see that, revert to plain old "jsmith".
237 user = userinfo[0]
238 else:
239 user = "%s <%s>" % (userinfo[0], userinfo[1])
241 assert type(cmd.message) is unicode
242 text = cmd.message.encode("utf-8")
243 date = self.convert_date(userinfo)
245 parents = [self.idmap[i] for i in first_parent, second_parent if i]
246 cmt = commit(user, date, text, parents, branch, rev=cmd.id)
248 self.commitmap[cmd.id] = cmt
249 heads = self.branchmap.get(branch)
250 if heads is None:
251 heads = [cmd.id]
252 else:
253 # adding to an existing branch: replace the previous head
254 try:
255 heads.remove(first_parent)
256 except ValueError: # first parent not a head: no problem
257 pass
258 heads.append(cmd.id) # at end means this is tipmost
259 self.branchmap[branch] = heads
260 self.ui.debug("processed commit %s\n" % cmd)
262 self.idmap[cmd.id] = self.putcommit(modified, modes, copies, cmt)
264 def putcommit(self, files, modes, copies, commit):
266 def getfilectx(repo, memctx, name):
267 fileid = files[name]
268 if fileid is None: # deleted file
269 raise IOError
270 data = self.getblob(fileid)
271 ctx = context.memfilectx(name, data, 'l' in modes,
272 'x' in modes, copies.get(name))
273 return ctx
275 parents = list(set(commit.parents))
276 nparents = len(parents)
278 if len(parents) < 2:
279 parents.append(nullid)
280 if len(parents) < 2:
281 parents.append(nullid)
282 p2 = parents.pop(0)
284 text = commit.desc
285 extra = commit.extra.copy()
286 if self.branchnames and commit.branch:
287 extra['branch'] = commit.branch
289 while parents:
290 p1 = p2
291 p2 = parents.pop(0)
292 ctx = context.memctx(self.repo, (p1, p2), text, files.keys(),
293 getfilectx, commit.author, commit.date, extra)
294 self.repo.commitctx(ctx)
295 text = "(octopus merge fixup)\n"
296 p2 = hex(self.repo.changelog.tip())
298 return p2
300 def convert_date(self, c):
301 res = (int(c[2]), int(c[3]))
302 #print c, res
303 #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
304 #if type(res) is type((0, 0)) and len(res) == 2:
305 # print "go for it"
306 #return res
307 return "%d %d" % res
309 def reset_handler(self, cmd):
310 tagprefix = "refs/tags/"
311 branch = self._getbranch(cmd.ref)
312 if branch:
313 # The usual case for 'reset': (re)create the named branch.
314 # XXX what should we do if cmd.from_ is None?
315 if cmd.from_ is not None:
316 self.branchmap[branch] = [cmd.from_]
317 else:
318 # pretend the branch never existed... is this right?!?
319 try:
320 del self.branchmap[branch]
321 except KeyError:
322 pass
323 #else:
324 # # XXX filename? line number?
325 # self.ui.warn("ignoring branch reset with no 'from'\n")
326 elif cmd.ref.startswith(tagprefix):
327 # Create a "lightweight tag" in Git terms. As I understand
328 # it, that's a tag with no description and no history --
329 # rather like CVS tags. cvs2git turns CVS tags into Git
330 # lightweight tags, so we should make sure they become
331 # Mercurial tags. But we don't have to fake a history for
332 # them; save them up for the end.
333 tag = cmd.ref[len(tagprefix):]
334 self.tags.append((tag, cmd.from_))
336 def tag_handler(self, cmd):
337 pass
339 def feature_handler(self, cmd):
340 if cmd.feature_name == 'done':
341 return
342 raise NotImplementedError(self.feature_handler)
345 class HgImportCommitHandler(processor.CommitHandler):
347 def __init__(self, parent, command, ui):
348 self.parent = parent # HgImportProcessor running the show
349 self.command = command # CommitCommand that we're processing
350 self.ui = ui
352 # Files changes by this commit as a list of (filename, id)
353 # tuples where id is (commitid, blobid). The blobid is
354 # needed to fetch the file's contents later, and the commitid
355 # is needed to fetch the mode.
356 # (XXX what about inline file contents?)
357 # (XXX how to describe deleted files?)
358 self.modified = []
360 # mode of files listed in self.modified: '', 'x', or 'l'
361 self.mode = {}
363 # dictionary of src: dest (renamed files are in here and self.modified)
364 self.copies = {}
366 # number of inline files seen in this commit
367 self.inlinecount = 0
369 def modify_handler(self, filecmd):
370 if filecmd.dataref:
371 blobid = filecmd.dataref # blobid is the mark of the blob
372 else:
373 blobid = "%s-inline:%d" % (self.command.id, self.inlinecount)
374 assert filecmd.data is not None
375 self.parent.writeblob(blobid, filecmd.data)
376 self.inlinecount += 1
378 fileid = (self.command.id, blobid)
380 self.modified.append((filecmd.path, fileid))
381 if filecmd.mode.endswith("644"): # normal file
382 mode = ''
383 elif filecmd.mode.endswith("755"): # executable
384 mode = 'x'
385 elif filecmd.mode == "120000": # symlink
386 mode = 'l'
387 else:
388 raise RuntimeError("mode %r unsupported" % filecmd.mode)
390 self.mode[filecmd.path] = mode
392 def delete_handler(self, filecmd):
393 self.modified.append((filecmd.path, None))
395 def copy_handler(self, filecmd):
396 self.copies[filecmd.src_path] = filecmd.dest_path
398 def rename_handler(self, filecmd):
399 # copy oldname to newname and delete oldname
400 self.copies[filecmd.oldname] = filecmd.newname
401 self.files.append((filecmd.path, None))