cvs2git: Make the --blobfile argument optional.
[cvs2svn.git] / cvs2svn_lib / svn_dump.py
blob82f06a2b40222df3e4392a5372001b00bc934a4e
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2010 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains code to output to Subversion dumpfile format."""
20 import subprocess
22 try:
23 from hashlib import md5
24 except ImportError:
25 from md5 import new as md5
27 from cvs2svn_lib.common import CommandError
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 def utf8_path(path):
43 """Return a copy of PATH encoded in UTF-8."""
45 try:
46 return Ctx().cvs_filename_decoder.decode_path(path).encode('utf8')
47 except UnicodeError:
48 raise FatalError(
49 "Unable to convert a path '%s' to internal encoding.\n"
50 "Consider rerunning with one or more '--encoding' parameters or\n"
51 "with '--fallback-encoding'."
52 % (path,))
55 def generate_ignores(cvsignore, raw_ignore_val):
56 ignore_vals = [ ]
57 for ignore in raw_ignore_val.split():
58 # Reset the list if we encounter a '!'
59 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
60 if ignore == '!':
61 ignore_vals = [ ]
62 else:
63 try:
64 ignore = Ctx().cvs_filename_decoder.decode_path(ignore).encode('utf8')
65 except UnicodeError:
66 raise FatalError(
67 "Unable to convert path '%s' (found in file %s) to internal encoding.\n"
68 "Consider rerunning with one or more '--encoding' parameters or\n"
69 "with '--fallback-encoding'."
70 % (ignore, cvsignore,))
71 ignore_vals.append(ignore)
72 return ignore_vals
75 class DumpstreamDelegate(SVNRepositoryDelegate):
76 """Write output in Subversion dumpfile format."""
78 def __init__(self, revision_reader, dumpfile):
79 """Return a new DumpstreamDelegate instance.
81 DUMPFILE should be a file-like object opened in binary mode, to
82 which the dump stream will be written. The only methods called on
83 the object are write() and close()."""
85 self._revision_reader = revision_reader
86 self._dumpfile = dumpfile
87 self._write_dumpfile_header()
89 # A set of the basic project infrastructure project directories
90 # that have been created so far, as SVN paths. (The root
91 # directory is considered to be present at initialization.) This
92 # includes all of the LOD paths, and all of their parent
93 # directories etc.
94 self._basic_directories = set([''])
96 def _write_dumpfile_header(self):
97 """Initialize the dumpfile with the standard headers.
99 Since the CVS repository doesn't have a UUID, and the Subversion
100 repository will be created with one anyway, we don't specify a
101 UUID in the dumpfile."""
103 self._dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
105 @staticmethod
106 def _string_for_props(properties):
107 """Return PROPERTIES in the form needed for the dumpfile."""
109 prop_strings = []
110 for (k, v) in sorted(properties.iteritems()):
111 if k.startswith('_'):
112 # Such properties are for internal use only.
113 pass
114 elif v is None:
115 # None indicates that the property should be left unset.
116 pass
117 else:
118 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
120 prop_strings.append('PROPS-END\n')
122 return ''.join(prop_strings)
124 def start_commit(self, revnum, revprops):
125 """Emit the start of SVN_COMMIT (an SVNCommit)."""
127 # The start of a new commit typically looks like this:
129 # Revision-number: 1
130 # Prop-content-length: 129
131 # Content-length: 129
133 # K 7
134 # svn:log
135 # V 27
136 # Log message for revision 1.
137 # K 10
138 # svn:author
139 # V 7
140 # jrandom
141 # K 8
142 # svn:date
143 # V 27
144 # 2003-04-22T22:57:58.132837Z
145 # PROPS-END
147 # Notice that the length headers count everything -- not just the
148 # length of the data but also the lengths of the lengths, including
149 # the 'K ' or 'V ' prefixes.
151 # The reason there are both Prop-content-length and Content-length
152 # is that the former includes just props, while the latter includes
153 # everything. That's the generic header form for any entity in a
154 # dumpfile. But since revisions only have props, the two lengths
155 # are always the same for revisions.
157 # Calculate the output needed for the property definitions.
158 all_prop_strings = self._string_for_props(revprops)
159 total_len = len(all_prop_strings)
161 # Print the revision header and revprops
162 self._dumpfile.write(
163 'Revision-number: %d\n'
164 'Prop-content-length: %d\n'
165 'Content-length: %d\n'
166 '\n'
167 '%s'
168 '\n'
169 % (revnum, total_len, total_len, all_prop_strings)
172 def end_commit(self):
173 pass
175 def _make_any_dir(self, path):
176 """Emit the creation of directory PATH."""
178 self._dumpfile.write(
179 "Node-path: %s\n"
180 "Node-kind: dir\n"
181 "Node-action: add\n"
182 "\n"
183 "\n"
184 % utf8_path(path)
187 def _register_basic_directory(self, path, create):
188 """Register the creation of PATH if it is not already there.
190 Create any parent directories that do not already exist. If
191 CREATE is set, also create PATH if it doesn't already exist. This
192 method should only be used for the LOD paths and the directories
193 containing them, not for directories within an LOD path."""
195 if path not in self._basic_directories:
196 # Make sure that the parent directory is present:
197 self._register_basic_directory(path_split(path)[0], True)
198 if create:
199 self._make_any_dir(path)
200 self._basic_directories.add(path)
202 def initialize_project(self, project):
203 """Create any initial directories for the project.
205 The trunk, tags, and branches directories directories are created
206 the first time the project is seen. Be sure not to create parent
207 directories that already exist (e.g., because two directories
208 share part of their paths either within or across projects)."""
210 for path in project.get_initial_directories():
211 self._register_basic_directory(path, True)
213 def initialize_lod(self, lod):
214 lod_path = lod.get_path()
215 if lod_path:
216 self._register_basic_directory(lod_path, True)
218 def mkdir(self, lod, cvs_directory):
219 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
221 def _add_or_change_path(self, cvs_rev, op):
222 """Emit the addition or change corresponding to CVS_REV.
224 OP is either the constant OP_ADD or OP_CHANGE."""
226 assert op in [OP_ADD, OP_CHANGE]
228 # The property handling here takes advantage of an undocumented
229 # but IMHO consistent feature of the Subversion dumpfile-loading
230 # code. When a node's properties aren't mentioned (that is, the
231 # "Prop-content-length:" header is absent, no properties are
232 # listed at all, and there is no "PROPS-END\n" line) then no
233 # change is made to the node's properties.
235 # This is consistent with the way dumpfiles behave w.r.t. text
236 # content changes, so I'm comfortable relying on it. If you
237 # commit a change to *just* the properties of some node that
238 # already has text contents from a previous revision, then in the
239 # dumpfile output for the prop change, no "Text-content-length:"
240 # nor "Text-content-md5:" header will be present, and the text of
241 # the file will not be given. But this does not cause the file's
242 # text to be erased! It simply remains unchanged.
244 # This works out great for cvs2svn, due to lucky coincidences:
246 # For files, we set most properties in the first revision and
247 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
248 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
249 # is no need to remember the full set of properties on a given
250 # file once we've set it.
252 # For directories, the only property we set is "svn:ignore", and
253 # while we may change it after the first revision, we always do so
254 # based on the contents of a ".cvsignore" file -- in other words,
255 # CVS is doing the remembering for us, so we still don't have to
256 # preserve the previous value of the property ourselves.
258 # Calculate the (sorted-by-name) property string and length, if any.
259 svn_props = cvs_rev.get_properties()
260 if cvs_rev.properties_changed:
261 prop_contents = self._string_for_props(svn_props)
262 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
263 else:
264 prop_contents = ''
265 props_header = ''
267 data = self._revision_reader.get_content(cvs_rev)
269 # treat .cvsignore as a directory property
270 dir_path, basename = path_split(cvs_rev.get_svn_path())
271 if basename == '.cvsignore':
272 ignore_contents = self._string_for_props({
273 'svn:ignore' : ''.join(
274 (s + '\n') for s in generate_ignores(cvs_rev.get_svn_path(), data)
277 ignore_len = len(ignore_contents)
279 # write headers, then props
280 self._dumpfile.write(
281 'Node-path: %s\n'
282 'Node-kind: dir\n'
283 'Node-action: change\n'
284 'Prop-content-length: %d\n'
285 'Content-length: %d\n'
286 '\n'
287 '%s'
288 % (utf8_path(dir_path),
289 ignore_len, ignore_len, ignore_contents)
291 if not Ctx().keep_cvsignore:
292 return
294 checksum = md5()
295 checksum.update(data)
297 # The content length is the length of property data, text data,
298 # and any metadata around/inside around them:
299 self._dumpfile.write(
300 'Node-path: %s\n'
301 'Node-kind: file\n'
302 'Node-action: %s\n'
303 '%s' # no property header if no props
304 'Text-content-length: %d\n'
305 'Text-content-md5: %s\n'
306 'Content-length: %d\n'
307 '\n' % (
308 utf8_path(cvs_rev.get_svn_path()), op, props_header,
309 len(data), checksum.hexdigest(), len(data) + len(prop_contents),
313 if prop_contents:
314 self._dumpfile.write(prop_contents)
316 self._dumpfile.write(data)
318 # This record is done (write two newlines -- one to terminate
319 # contents that weren't themselves newline-termination, one to
320 # provide a blank line for readability.
321 self._dumpfile.write('\n\n')
323 def add_path(self, cvs_rev):
324 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
326 self._add_or_change_path(cvs_rev, OP_ADD)
328 def change_path(self, cvs_rev):
329 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
331 self._add_or_change_path(cvs_rev, OP_CHANGE)
333 def delete_lod(self, lod):
334 """Emit the deletion of LOD."""
336 self._dumpfile.write(
337 'Node-path: %s\n'
338 'Node-action: delete\n'
339 '\n'
340 % (utf8_path(lod.get_path()),)
342 self._basic_directories.remove(lod.get_path())
344 def delete_path(self, lod, cvs_path):
345 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
346 if basename == '.cvsignore':
347 # When a .cvsignore file is deleted, the directory's svn:ignore
348 # property needs to be deleted.
349 ignore_contents = 'PROPS-END\n'
350 ignore_len = len(ignore_contents)
352 # write headers, then props
353 self._dumpfile.write(
354 'Node-path: %s\n'
355 'Node-kind: dir\n'
356 'Node-action: change\n'
357 'Prop-content-length: %d\n'
358 'Content-length: %d\n'
359 '\n'
360 '%s'
361 % (utf8_path(dir_path),
362 ignore_len, ignore_len, ignore_contents)
364 if not Ctx().keep_cvsignore:
365 return
367 self._dumpfile.write(
368 'Node-path: %s\n'
369 'Node-action: delete\n'
370 '\n'
371 % (utf8_path(lod.get_path(cvs_path.cvs_path)),)
374 def copy_lod(self, src_lod, dest_lod, src_revnum):
375 # Register the main LOD directory, and create parent directories
376 # as needed:
377 self._register_basic_directory(dest_lod.get_path(), False)
379 self._dumpfile.write(
380 'Node-path: %s\n'
381 'Node-kind: dir\n'
382 'Node-action: add\n'
383 'Node-copyfrom-rev: %d\n'
384 'Node-copyfrom-path: %s\n'
385 '\n'
386 % (utf8_path(dest_lod.get_path()),
387 src_revnum, utf8_path(src_lod.get_path()))
390 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
391 if isinstance(cvs_path, CVSFile):
392 node_kind = 'file'
393 if cvs_path.rcs_basename == '.cvsignore':
394 # FIXME: Here we have to adjust the containing directory's
395 # svn:ignore property to reflect the addition of the
396 # .cvsignore file to the LOD! This is awkward because we
397 # don't have the contents of the .cvsignore file available.
398 if not Ctx().keep_cvsignore:
399 return
400 elif isinstance(cvs_path, CVSDirectory):
401 node_kind = 'dir'
402 else:
403 raise InternalError()
405 self._dumpfile.write(
406 'Node-path: %s\n'
407 'Node-kind: %s\n'
408 'Node-action: add\n'
409 'Node-copyfrom-rev: %d\n'
410 'Node-copyfrom-path: %s\n'
411 '\n'
413 utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
414 node_kind,
415 src_revnum,
416 utf8_path(src_lod.get_path(cvs_path.cvs_path))
420 def finish(self):
421 """Perform any cleanup necessary after all revisions have been
422 committed."""
424 self._dumpfile.close()
427 class LoaderPipe(object):
428 """A file-like object that writes to 'svnadmin load'.
430 Some error checking and reporting are done when writing."""
432 def __init__(self, target):
433 self.loader_pipe = subprocess.Popen(
434 [Ctx().svnadmin_executable, 'load', '-q', target],
435 stdin=subprocess.PIPE,
436 stdout=subprocess.PIPE,
437 stderr=subprocess.PIPE,
439 self.loader_pipe.stdout.close()
441 def write(self, s):
442 try:
443 self.loader_pipe.stdin.write(s)
444 except IOError:
445 raise FatalError(
446 'svnadmin failed with the following output while '
447 'loading the dumpfile:\n%s'
448 % (self.loader_pipe.stderr.read(),)
451 def close(self):
452 self.loader_pipe.stdin.close()
453 error_output = self.loader_pipe.stderr.read()
454 exit_status = self.loader_pipe.wait()
455 del self.loader_pipe
456 if exit_status:
457 raise CommandError('svnadmin load', exit_status, error_output)