Document changes made since last edit of CHANGES.
[cvs2svn.git] / cvs2svn_lib / svn_dump.py
blob9a8fe9dd7d8d87173cfa1d4d82842b0ae5208f24
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2010 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains code to output to Subversion dumpfile format."""
20 import subprocess
22 try:
23 from hashlib import md5
24 except ImportError:
25 from md5 import new as md5
27 from cvs2svn_lib.common import CommandError
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 class DumpstreamDelegate(SVNRepositoryDelegate):
43 """Write output in Subversion dumpfile format."""
45 def __init__(self, revision_reader, dumpfile):
46 """Return a new DumpstreamDelegate instance.
48 DUMPFILE should be a file-like object opened in binary mode, to
49 which the dump stream will be written. The only methods called on
50 the object are write() and close()."""
52 self._revision_reader = revision_reader
53 self._dumpfile = dumpfile
54 self._write_dumpfile_header()
56 # A set of the basic project infrastructure project directories
57 # that have been created so far, as SVN paths. (The root
58 # directory is considered to be present at initialization.) This
59 # includes all of the LOD paths, and all of their parent
60 # directories etc.
61 self._basic_directories = set([''])
63 def _write_dumpfile_header(self):
64 """Initialize the dumpfile with the standard headers.
66 Since the CVS repository doesn't have a UUID, and the Subversion
67 repository will be created with one anyway, we don't specify a
68 UUID in the dumpfile."""
70 self._dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
72 def _utf8_path(self, path):
73 """Return a copy of PATH encoded in UTF-8."""
75 # Convert each path component separately (as they may each use
76 # different encodings).
77 try:
78 return '/'.join([
79 Ctx().cvs_filename_decoder(piece).encode('utf8')
80 for piece in path.split('/')
82 except UnicodeError:
83 raise FatalError(
84 "Unable to convert a path '%s' to internal encoding.\n"
85 "Consider rerunning with one or more '--encoding' parameters or\n"
86 "with '--fallback-encoding'."
87 % (path,))
89 @staticmethod
90 def _string_for_props(properties):
91 """Return PROPERTIES in the form needed for the dumpfile."""
93 prop_strings = []
94 for (k, v) in sorted(properties.iteritems()):
95 if k.startswith('_'):
96 # Such properties are for internal use only.
97 pass
98 elif v is None:
99 # None indicates that the property should be left unset.
100 pass
101 else:
102 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
104 prop_strings.append('PROPS-END\n')
106 return ''.join(prop_strings)
108 def start_commit(self, revnum, revprops):
109 """Emit the start of SVN_COMMIT (an SVNCommit)."""
111 # The start of a new commit typically looks like this:
113 # Revision-number: 1
114 # Prop-content-length: 129
115 # Content-length: 129
117 # K 7
118 # svn:log
119 # V 27
120 # Log message for revision 1.
121 # K 10
122 # svn:author
123 # V 7
124 # jrandom
125 # K 8
126 # svn:date
127 # V 27
128 # 2003-04-22T22:57:58.132837Z
129 # PROPS-END
131 # Notice that the length headers count everything -- not just the
132 # length of the data but also the lengths of the lengths, including
133 # the 'K ' or 'V ' prefixes.
135 # The reason there are both Prop-content-length and Content-length
136 # is that the former includes just props, while the latter includes
137 # everything. That's the generic header form for any entity in a
138 # dumpfile. But since revisions only have props, the two lengths
139 # are always the same for revisions.
141 # Calculate the output needed for the property definitions.
142 all_prop_strings = self._string_for_props(revprops)
143 total_len = len(all_prop_strings)
145 # Print the revision header and revprops
146 self._dumpfile.write(
147 'Revision-number: %d\n'
148 'Prop-content-length: %d\n'
149 'Content-length: %d\n'
150 '\n'
151 '%s'
152 '\n'
153 % (revnum, total_len, total_len, all_prop_strings)
156 def end_commit(self):
157 pass
159 def _make_any_dir(self, path):
160 """Emit the creation of directory PATH."""
162 self._dumpfile.write(
163 "Node-path: %s\n"
164 "Node-kind: dir\n"
165 "Node-action: add\n"
166 "\n"
167 "\n"
168 % self._utf8_path(path)
171 def _register_basic_directory(self, path, create):
172 """Register the creation of PATH if it is not already there.
174 Create any parent directories that do not already exist. If
175 CREATE is set, also create PATH if it doesn't already exist. This
176 method should only be used for the LOD paths and the directories
177 containing them, not for directories within an LOD path."""
179 if path not in self._basic_directories:
180 # Make sure that the parent directory is present:
181 self._register_basic_directory(path_split(path)[0], True)
182 if create:
183 self._make_any_dir(path)
184 self._basic_directories.add(path)
186 def initialize_project(self, project):
187 """Create any initial directories for the project.
189 The trunk, tags, and branches directories directories are created
190 the first time the project is seen. Be sure not to create parent
191 directories that already exist (e.g., because two directories
192 share part of their paths either within or across projects)."""
194 for path in project.get_initial_directories():
195 self._register_basic_directory(path, True)
197 def initialize_lod(self, lod):
198 lod_path = lod.get_path()
199 if lod_path:
200 self._register_basic_directory(lod_path, True)
202 def mkdir(self, lod, cvs_directory):
203 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
205 def _add_or_change_path(self, cvs_rev, op):
206 """Emit the addition or change corresponding to CVS_REV.
208 OP is either the constant OP_ADD or OP_CHANGE."""
210 assert op in [OP_ADD, OP_CHANGE]
212 # The property handling here takes advantage of an undocumented
213 # but IMHO consistent feature of the Subversion dumpfile-loading
214 # code. When a node's properties aren't mentioned (that is, the
215 # "Prop-content-length:" header is absent, no properties are
216 # listed at all, and there is no "PROPS-END\n" line) then no
217 # change is made to the node's properties.
219 # This is consistent with the way dumpfiles behave w.r.t. text
220 # content changes, so I'm comfortable relying on it. If you
221 # commit a change to *just* the properties of some node that
222 # already has text contents from a previous revision, then in the
223 # dumpfile output for the prop change, no "Text-content-length:"
224 # nor "Text-content-md5:" header will be present, and the text of
225 # the file will not be given. But this does not cause the file's
226 # text to be erased! It simply remains unchanged.
228 # This works out great for cvs2svn, due to lucky coincidences:
230 # For files, we set most properties in the first revision and
231 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
232 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
233 # is no need to remember the full set of properties on a given
234 # file once we've set it.
236 # For directories, the only property we set is "svn:ignore", and
237 # while we may change it after the first revision, we always do so
238 # based on the contents of a ".cvsignore" file -- in other words,
239 # CVS is doing the remembering for us, so we still don't have to
240 # preserve the previous value of the property ourselves.
242 # Calculate the (sorted-by-name) property string and length, if any.
243 svn_props = cvs_rev.get_properties()
244 if cvs_rev.properties_changed:
245 prop_contents = self._string_for_props(svn_props)
246 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
247 else:
248 prop_contents = ''
249 props_header = ''
251 data = self._revision_reader.get_content(cvs_rev)
253 # treat .cvsignore as a directory property
254 dir_path, basename = path_split(cvs_rev.get_svn_path())
255 if basename == '.cvsignore':
256 ignore_contents = self._string_for_props({
257 'svn:ignore' : ''.join((s + '\n') for s in generate_ignores(data))
259 ignore_len = len(ignore_contents)
261 # write headers, then props
262 self._dumpfile.write(
263 'Node-path: %s\n'
264 'Node-kind: dir\n'
265 'Node-action: change\n'
266 'Prop-content-length: %d\n'
267 'Content-length: %d\n'
268 '\n'
269 '%s'
270 % (self._utf8_path(dir_path),
271 ignore_len, ignore_len, ignore_contents)
273 if not Ctx().keep_cvsignore:
274 return
276 checksum = md5()
277 checksum.update(data)
279 # The content length is the length of property data, text data,
280 # and any metadata around/inside around them:
281 self._dumpfile.write(
282 'Node-path: %s\n'
283 'Node-kind: file\n'
284 'Node-action: %s\n'
285 '%s' # no property header if no props
286 'Text-content-length: %d\n'
287 'Text-content-md5: %s\n'
288 'Content-length: %d\n'
289 '\n' % (
290 self._utf8_path(cvs_rev.get_svn_path()), op, props_header,
291 len(data), checksum.hexdigest(), len(data) + len(prop_contents),
295 if prop_contents:
296 self._dumpfile.write(prop_contents)
298 self._dumpfile.write(data)
300 # This record is done (write two newlines -- one to terminate
301 # contents that weren't themselves newline-termination, one to
302 # provide a blank line for readability.
303 self._dumpfile.write('\n\n')
305 def add_path(self, cvs_rev):
306 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
308 self._add_or_change_path(cvs_rev, OP_ADD)
310 def change_path(self, cvs_rev):
311 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
313 self._add_or_change_path(cvs_rev, OP_CHANGE)
315 def delete_lod(self, lod):
316 """Emit the deletion of LOD."""
318 self._dumpfile.write(
319 'Node-path: %s\n'
320 'Node-action: delete\n'
321 '\n'
322 % (self._utf8_path(lod.get_path()),)
324 self._basic_directories.remove(lod.get_path())
326 def delete_path(self, lod, cvs_path):
327 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
328 if basename == '.cvsignore':
329 # When a .cvsignore file is deleted, the directory's svn:ignore
330 # property needs to be deleted.
331 ignore_contents = 'PROPS-END\n'
332 ignore_len = len(ignore_contents)
334 # write headers, then props
335 self._dumpfile.write(
336 'Node-path: %s\n'
337 'Node-kind: dir\n'
338 'Node-action: change\n'
339 'Prop-content-length: %d\n'
340 'Content-length: %d\n'
341 '\n'
342 '%s'
343 % (self._utf8_path(dir_path),
344 ignore_len, ignore_len, ignore_contents)
346 if not Ctx().keep_cvsignore:
347 return
349 self._dumpfile.write(
350 'Node-path: %s\n'
351 'Node-action: delete\n'
352 '\n'
353 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
356 def copy_lod(self, src_lod, dest_lod, src_revnum):
357 # Register the main LOD directory, and create parent directories
358 # as needed:
359 self._register_basic_directory(dest_lod.get_path(), False)
361 self._dumpfile.write(
362 'Node-path: %s\n'
363 'Node-kind: dir\n'
364 'Node-action: add\n'
365 'Node-copyfrom-rev: %d\n'
366 'Node-copyfrom-path: %s\n'
367 '\n'
368 % (self._utf8_path(dest_lod.get_path()),
369 src_revnum, self._utf8_path(src_lod.get_path()))
372 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
373 if isinstance(cvs_path, CVSFile):
374 node_kind = 'file'
375 if cvs_path.rcs_basename == '.cvsignore':
376 # FIXME: Here we have to adjust the containing directory's
377 # svn:ignore property to reflect the addition of the
378 # .cvsignore file to the LOD! This is awkward because we
379 # don't have the contents of the .cvsignore file available.
380 if not Ctx().keep_cvsignore:
381 return
382 elif isinstance(cvs_path, CVSDirectory):
383 node_kind = 'dir'
384 else:
385 raise InternalError()
387 self._dumpfile.write(
388 'Node-path: %s\n'
389 'Node-kind: %s\n'
390 'Node-action: add\n'
391 'Node-copyfrom-rev: %d\n'
392 'Node-copyfrom-path: %s\n'
393 '\n'
395 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
396 node_kind,
397 src_revnum,
398 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
402 def finish(self):
403 """Perform any cleanup necessary after all revisions have been
404 committed."""
406 self._dumpfile.close()
409 def generate_ignores(raw_ignore_val):
410 ignore_vals = [ ]
411 for ignore in raw_ignore_val.split():
412 # Reset the list if we encounter a '!'
413 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
414 if ignore == '!':
415 ignore_vals = [ ]
416 else:
417 ignore_vals.append(ignore)
418 return ignore_vals
421 class LoaderPipe(object):
422 """A file-like object that writes to 'svnadmin load'.
424 Some error checking and reporting are done when writing."""
426 def __init__(self, target):
427 self.loader_pipe = subprocess.Popen(
428 [Ctx().svnadmin_executable, 'load', '-q', target],
429 stdin=subprocess.PIPE,
430 stdout=subprocess.PIPE,
431 stderr=subprocess.PIPE,
433 self.loader_pipe.stdout.close()
435 def write(self, s):
436 try:
437 self.loader_pipe.stdin.write(s)
438 except IOError, e:
439 raise FatalError(
440 'svnadmin failed with the following output while '
441 'loading the dumpfile:\n%s'
442 % (self.loader_pipe.stderr.read(),)
445 def close(self):
446 self.loader_pipe.stdin.close()
447 error_output = self.loader_pipe.stderr.read()
448 exit_status = self.loader_pipe.wait()
449 del self.loader_pipe
450 if exit_status:
451 raise CommandError('svnadmin load', exit_status, error_output)