1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 """Processor of import commands.
19 This module provides core processing functionality including an abstract class
20 for basing real processors on. See the processors package for examples.
26 from mercurial
import context
27 from mercurial
.node
import nullid
, hex
29 from git_remote_helpers
.util
import die
30 from git_remote_helpers
.fastimport
import processor
, parser
34 def __init__(self
, author
, date
, desc
, parents
, branch
=None, rev
=None,
35 extra
={}, sortkey
=None):
36 self
.author
= author
or 'unknown'
37 self
.date
= date
or '0 0'
39 self
.parents
= parents
43 self
.sortkey
= sortkey
46 class HgImportProcessor(processor
.ImportProcessor
):
48 def __init__(self
, ui
, repo
):
49 super(HgImportProcessor
, self
).__init
__()
53 self
.branchnames
= True
56 self
.commitmap
= {} # map commit ID (":1") to commit object
57 self
.branchmap
= {} # map branch name to list of heads
59 self
.tags
= [] # list of (tag, mark) tuples
61 self
.numblobs
= 0 # for progress reporting
65 """Setup before processing any streams."""
69 """Cleanup after processing all streams."""
70 if self
.blobdir
and os
.path
.exists(self
.blobdir
):
71 self
.ui
.status("Removing blob dir %r ...\n" % self
.blobdir
)
72 shutil
.rmtree(self
.blobdir
)
74 def load_marksfile(self
, name
):
79 parsed
= [i
.strip().split(' ') for i
in lines
]
80 self
.idmap
= dict((i
[0], i
[1]) for i
in parsed
)
82 die("load: %s", str(e
))
84 def write_marksfile(self
, name
):
87 for pair
in sorted(self
.idmap
.iteritems()):
88 f
.write("%s %s\n" % pair
)
91 die("write: %s", str(e
))
93 def progress_handler(self
, cmd
):
94 self
.ui
.write("Progress: %s\n" % cmd
.message
)
96 def blob_handler(self
, cmd
):
97 self
.writeblob(cmd
.id, cmd
.data
)
99 def _getblobfilename(self
, blobid
):
100 if self
.blobdir
is None:
101 raise RuntimeError("no blobs seen, so no blob directory created")
102 # XXX should escape ":" for windows
103 return os
.path
.join(self
.blobdir
, "blob-" + blobid
)
105 def getblob(self
, fileid
):
106 (commitid
, blobid
) = fileid
107 f
= open(self
._getblobfilename
(blobid
), "rb")
113 def writeblob(self
, blobid
, data
):
114 if self
.blobdir
is None: # no blobs seen yet
115 self
.blobdir
= os
.path
.join(self
.repo
.root
, ".hg", "blobs")
116 if not os
.path
.exists(self
.blobdir
):
117 os
.mkdir(self
.blobdir
)
119 fn
= self
._getblobfilename
(blobid
)
120 blobfile
= open(fn
, "wb")
121 #self.ui.debug("writing blob %s to %s (%d bytes)\n"
122 # % (blobid, fn, len(data)))
127 if self
.numblobs
% 500 == 0:
128 self
.ui
.status("%d blobs read\n" % self
.numblobs
)
130 def getmode(self
, name
, fileid
):
131 (commitid
, blobid
) = fileid
132 return self
.filemodes
[commitid
][name
]
134 def checkpoint_handler(self
, cmd
):
135 # This command means nothing to us
138 def _getcommit(self
, committish
):
139 """Given a mark reference or a branch name, return the
140 appropriate commit object. Return None if committish is a
141 branch with no commits. Raises KeyError if anything else is out
144 if committish
.startswith(":"):
145 # KeyError here indicates the input stream is broken.
146 return self
.commitmap
[committish
]
148 branch
= self
._getbranch
(committish
)
150 raise ValueError("invalid committish: %r" % committish
)
152 heads
= self
.branchmap
.get(branch
)
156 # KeyError here indicates bad commit id in self.branchmap.
157 return self
.commitmap
[heads
[-1]]
159 def _getbranch(self
, ref
):
160 """Translate a Git head ref to corresponding Mercurial branch
161 name. E.g. \"refs/heads/foo\" is translated to \"foo\".
162 Special case: \"refs/heads/master\" becomes \"default\". If
163 'ref' is not a head ref, return None.
165 prefix
= "refs/heads/"
166 if ref
.startswith(prefix
):
167 branch
= ref
[len(prefix
):]
168 if branch
== "master":
175 def commit_handler(self
, cmd
):
176 # XXX this assumes the fixup branch name used by cvs2git. In
177 # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under
178 # refs/heads), and implies that it can be called whatever the
179 # creator of the fastimport dump wants to call it. So the name
180 # of the fixup branch should be configurable!
181 fixup
= (cmd
.ref
== "refs/heads/TAG.FIXUP")
184 first_parent
= cmd
.from_
186 first_parent
= self
._getcommit
(cmd
.ref
) # commit object
187 if first_parent
is not None:
188 first_parent
= first_parent
.rev
# commit id
191 if len(cmd
.merges
) > 1:
192 raise NotImplementedError("Can't handle more than two parents")
193 second_parent
= cmd
.merges
[0]
197 if first_parent
is None and second_parent
is not None:
198 # First commit on a new branch that has 'merge' but no 'from':
199 # special case meaning branch starts with no files; the contents of
200 # the first commit (this one) determine the list of files at branch
202 first_parent
= second_parent
204 no_files
= True # XXX this is ignored...
206 self
.ui
.debug("commit %s: first_parent = %r, second_parent = %r\n"
207 % (cmd
, first_parent
, second_parent
))
208 assert ((first_parent
!= second_parent
) or
209 (first_parent
is second_parent
is None)), \
210 ("commit %s: first_parent == second parent = %r"
211 % (cmd
, first_parent
))
213 # Figure out the Mercurial branch name.
214 if fixup
and first_parent
is not None:
215 # If this is a fixup commit, pretend it happened on the same
216 # branch as its first parent. (We don't want a Mercurial
217 # named branch called "TAG.FIXUP" in the output repository.)
218 branch
= self
.commitmap
[first_parent
].branch
220 branch
= self
._getbranch
(cmd
.ref
)
222 commit_handler
= HgImportCommitHandler(
224 commit_handler
.process()
225 modified
= dict(commit_handler
.modified
)
226 modes
= commit_handler
.mode
227 copies
= commit_handler
.copies
229 # in case we are converting from git or bzr, prefer author but
230 # fallback to committer (committer is required, author is
232 userinfo
= cmd
.author
or cmd
.committer
233 if userinfo
[0] == userinfo
[1]:
234 # In order to conform to fastimport syntax, cvs2git with no
235 # authormap produces author names like "jsmith <jsmith>"; if
236 # we see that, revert to plain old "jsmith".
239 user
= "%s <%s>" % (userinfo
[0], userinfo
[1])
241 assert type(cmd
.message
) is unicode
242 text
= cmd
.message
.encode("utf-8")
243 date
= self
.convert_date(userinfo
)
245 parents
= [self
.idmap
[i
] for i
in first_parent
, second_parent
if i
]
246 cmt
= commit(user
, date
, text
, parents
, branch
, rev
=cmd
.id)
248 self
.commitmap
[cmd
.id] = cmt
249 heads
= self
.branchmap
.get(branch
)
253 # adding to an existing branch: replace the previous head
255 heads
.remove(first_parent
)
256 except ValueError: # first parent not a head: no problem
258 heads
.append(cmd
.id) # at end means this is tipmost
259 self
.branchmap
[branch
] = heads
260 self
.ui
.debug("processed commit %s\n" % cmd
)
262 self
.idmap
[cmd
.id] = self
.putcommit(modified
, modes
, copies
, cmt
)
264 def putcommit(self
, files
, modes
, copies
, commit
):
266 def getfilectx(repo
, memctx
, name
):
268 if fileid
is None: # deleted file
270 data
= self
.getblob(fileid
)
271 ctx
= context
.memfilectx(name
, data
, 'l' in modes
,
272 'x' in modes
, copies
.get(name
))
275 parents
= list(set(commit
.parents
))
276 nparents
= len(parents
)
279 parents
.append(nullid
)
281 parents
.append(nullid
)
285 extra
= commit
.extra
.copy()
286 if self
.branchnames
and commit
.branch
:
287 extra
['branch'] = commit
.branch
292 ctx
= context
.memctx(self
.repo
, (p1
, p2
), text
, files
.keys(),
293 getfilectx
, commit
.author
, commit
.date
, extra
)
294 self
.repo
.commitctx(ctx
)
295 text
= "(octopus merge fixup)\n"
296 p2
= hex(self
.repo
.changelog
.tip())
300 def convert_date(self
, c
):
301 res
= (int(c
[2]), int(c
[3]))
303 #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
304 #if type(res) is type((0, 0)) and len(res) == 2:
309 def reset_handler(self
, cmd
):
310 tagprefix
= "refs/tags/"
311 branch
= self
._getbranch
(cmd
.ref
)
313 # The usual case for 'reset': (re)create the named branch.
314 # XXX what should we do if cmd.from_ is None?
315 if cmd
.from_
is not None:
316 self
.branchmap
[branch
] = [cmd
.from_
]
318 # pretend the branch never existed... is this right?!?
320 del self
.branchmap
[branch
]
324 # # XXX filename? line number?
325 # self.ui.warn("ignoring branch reset with no 'from'\n")
326 elif cmd
.ref
.startswith(tagprefix
):
327 # Create a "lightweight tag" in Git terms. As I understand
328 # it, that's a tag with no description and no history --
329 # rather like CVS tags. cvs2git turns CVS tags into Git
330 # lightweight tags, so we should make sure they become
331 # Mercurial tags. But we don't have to fake a history for
332 # them; save them up for the end.
333 tag
= cmd
.ref
[len(tagprefix
):]
334 self
.tags
.append((tag
, cmd
.from_
))
336 def tag_handler(self
, cmd
):
339 def feature_handler(self
, cmd
):
340 if cmd
.feature_name
== 'done':
342 raise NotImplementedError(self
.feature_handler
)
345 class HgImportCommitHandler(processor
.CommitHandler
):
347 def __init__(self
, parent
, command
, ui
):
348 self
.parent
= parent
# HgImportProcessor running the show
349 self
.command
= command
# CommitCommand that we're processing
352 # Files changes by this commit as a list of (filename, id)
353 # tuples where id is (commitid, blobid). The blobid is
354 # needed to fetch the file's contents later, and the commitid
355 # is needed to fetch the mode.
356 # (XXX what about inline file contents?)
357 # (XXX how to describe deleted files?)
360 # mode of files listed in self.modified: '', 'x', or 'l'
363 # dictionary of src: dest (renamed files are in here and self.modified)
366 # number of inline files seen in this commit
369 def modify_handler(self
, filecmd
):
371 blobid
= filecmd
.dataref
# blobid is the mark of the blob
373 blobid
= "%s-inline:%d" % (self
.command
.id, self
.inlinecount
)
374 assert filecmd
.data
is not None
375 self
.parent
.writeblob(blobid
, filecmd
.data
)
376 self
.inlinecount
+= 1
378 fileid
= (self
.command
.id, blobid
)
380 self
.modified
.append((filecmd
.path
, fileid
))
381 if filecmd
.mode
.endswith("644"): # normal file
383 elif filecmd
.mode
.endswith("755"): # executable
385 elif filecmd
.mode
== "120000": # symlink
388 raise RuntimeError("mode %r unsupported" % filecmd
.mode
)
390 self
.mode
[filecmd
.path
] = mode
392 def delete_handler(self
, filecmd
):
393 self
.modified
.append((filecmd
.path
, None))
395 def copy_handler(self
, filecmd
):
396 self
.copies
[filecmd
.src_path
] = filecmd
.dest_path
398 def rename_handler(self
, filecmd
):
399 # copy oldname to newname and delete oldname
400 self
.copies
[filecmd
.oldname
] = filecmd
.newname
401 self
.files
.append((filecmd
.path
, None))