Implement a method CVSTextDecoder.decode_path().
[cvs2svn.git] / cvs2svn_lib / svn_dump.py
blobf297b47204822c0e50df968374de888e247864d3
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2010 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains code to output to Subversion dumpfile format."""
20 import subprocess
22 try:
23 from hashlib import md5
24 except ImportError:
25 from md5 import new as md5
27 from cvs2svn_lib.common import CommandError
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 class DumpstreamDelegate(SVNRepositoryDelegate):
43 """Write output in Subversion dumpfile format."""
45 def __init__(self, revision_reader, dumpfile):
46 """Return a new DumpstreamDelegate instance.
48 DUMPFILE should be a file-like object opened in binary mode, to
49 which the dump stream will be written. The only methods called on
50 the object are write() and close()."""
52 self._revision_reader = revision_reader
53 self._dumpfile = dumpfile
54 self._write_dumpfile_header()
56 # A set of the basic project infrastructure project directories
57 # that have been created so far, as SVN paths. (The root
58 # directory is considered to be present at initialization.) This
59 # includes all of the LOD paths, and all of their parent
60 # directories etc.
61 self._basic_directories = set([''])
63 def _write_dumpfile_header(self):
64 """Initialize the dumpfile with the standard headers.
66 Since the CVS repository doesn't have a UUID, and the Subversion
67 repository will be created with one anyway, we don't specify a
68 UUID in the dumpfile."""
70 self._dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
72 def _utf8_path(self, path):
73 """Return a copy of PATH encoded in UTF-8."""
75 try:
76 return Ctx().cvs_filename_decoder.decode_path(path).encode('utf8')
77 except UnicodeError:
78 raise FatalError(
79 "Unable to convert a path '%s' to internal encoding.\n"
80 "Consider rerunning with one or more '--encoding' parameters or\n"
81 "with '--fallback-encoding'."
82 % (path,))
84 @staticmethod
85 def _string_for_props(properties):
86 """Return PROPERTIES in the form needed for the dumpfile."""
88 prop_strings = []
89 for (k, v) in sorted(properties.iteritems()):
90 if k.startswith('_'):
91 # Such properties are for internal use only.
92 pass
93 elif v is None:
94 # None indicates that the property should be left unset.
95 pass
96 else:
97 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
99 prop_strings.append('PROPS-END\n')
101 return ''.join(prop_strings)
103 def start_commit(self, revnum, revprops):
104 """Emit the start of SVN_COMMIT (an SVNCommit)."""
106 # The start of a new commit typically looks like this:
108 # Revision-number: 1
109 # Prop-content-length: 129
110 # Content-length: 129
112 # K 7
113 # svn:log
114 # V 27
115 # Log message for revision 1.
116 # K 10
117 # svn:author
118 # V 7
119 # jrandom
120 # K 8
121 # svn:date
122 # V 27
123 # 2003-04-22T22:57:58.132837Z
124 # PROPS-END
126 # Notice that the length headers count everything -- not just the
127 # length of the data but also the lengths of the lengths, including
128 # the 'K ' or 'V ' prefixes.
130 # The reason there are both Prop-content-length and Content-length
131 # is that the former includes just props, while the latter includes
132 # everything. That's the generic header form for any entity in a
133 # dumpfile. But since revisions only have props, the two lengths
134 # are always the same for revisions.
136 # Calculate the output needed for the property definitions.
137 all_prop_strings = self._string_for_props(revprops)
138 total_len = len(all_prop_strings)
140 # Print the revision header and revprops
141 self._dumpfile.write(
142 'Revision-number: %d\n'
143 'Prop-content-length: %d\n'
144 'Content-length: %d\n'
145 '\n'
146 '%s'
147 '\n'
148 % (revnum, total_len, total_len, all_prop_strings)
151 def end_commit(self):
152 pass
154 def _make_any_dir(self, path):
155 """Emit the creation of directory PATH."""
157 self._dumpfile.write(
158 "Node-path: %s\n"
159 "Node-kind: dir\n"
160 "Node-action: add\n"
161 "\n"
162 "\n"
163 % self._utf8_path(path)
166 def _register_basic_directory(self, path, create):
167 """Register the creation of PATH if it is not already there.
169 Create any parent directories that do not already exist. If
170 CREATE is set, also create PATH if it doesn't already exist. This
171 method should only be used for the LOD paths and the directories
172 containing them, not for directories within an LOD path."""
174 if path not in self._basic_directories:
175 # Make sure that the parent directory is present:
176 self._register_basic_directory(path_split(path)[0], True)
177 if create:
178 self._make_any_dir(path)
179 self._basic_directories.add(path)
181 def initialize_project(self, project):
182 """Create any initial directories for the project.
184 The trunk, tags, and branches directories directories are created
185 the first time the project is seen. Be sure not to create parent
186 directories that already exist (e.g., because two directories
187 share part of their paths either within or across projects)."""
189 for path in project.get_initial_directories():
190 self._register_basic_directory(path, True)
192 def initialize_lod(self, lod):
193 lod_path = lod.get_path()
194 if lod_path:
195 self._register_basic_directory(lod_path, True)
197 def mkdir(self, lod, cvs_directory):
198 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
200 def _add_or_change_path(self, cvs_rev, op):
201 """Emit the addition or change corresponding to CVS_REV.
203 OP is either the constant OP_ADD or OP_CHANGE."""
205 assert op in [OP_ADD, OP_CHANGE]
207 # The property handling here takes advantage of an undocumented
208 # but IMHO consistent feature of the Subversion dumpfile-loading
209 # code. When a node's properties aren't mentioned (that is, the
210 # "Prop-content-length:" header is absent, no properties are
211 # listed at all, and there is no "PROPS-END\n" line) then no
212 # change is made to the node's properties.
214 # This is consistent with the way dumpfiles behave w.r.t. text
215 # content changes, so I'm comfortable relying on it. If you
216 # commit a change to *just* the properties of some node that
217 # already has text contents from a previous revision, then in the
218 # dumpfile output for the prop change, no "Text-content-length:"
219 # nor "Text-content-md5:" header will be present, and the text of
220 # the file will not be given. But this does not cause the file's
221 # text to be erased! It simply remains unchanged.
223 # This works out great for cvs2svn, due to lucky coincidences:
225 # For files, we set most properties in the first revision and
226 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
227 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
228 # is no need to remember the full set of properties on a given
229 # file once we've set it.
231 # For directories, the only property we set is "svn:ignore", and
232 # while we may change it after the first revision, we always do so
233 # based on the contents of a ".cvsignore" file -- in other words,
234 # CVS is doing the remembering for us, so we still don't have to
235 # preserve the previous value of the property ourselves.
237 # Calculate the (sorted-by-name) property string and length, if any.
238 svn_props = cvs_rev.get_properties()
239 if cvs_rev.properties_changed:
240 prop_contents = self._string_for_props(svn_props)
241 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
242 else:
243 prop_contents = ''
244 props_header = ''
246 data = self._revision_reader.get_content(cvs_rev)
248 # treat .cvsignore as a directory property
249 dir_path, basename = path_split(cvs_rev.get_svn_path())
250 if basename == '.cvsignore':
251 ignore_contents = self._string_for_props({
252 'svn:ignore' : ''.join((s + '\n') for s in generate_ignores(data))
254 ignore_len = len(ignore_contents)
256 # write headers, then props
257 self._dumpfile.write(
258 'Node-path: %s\n'
259 'Node-kind: dir\n'
260 'Node-action: change\n'
261 'Prop-content-length: %d\n'
262 'Content-length: %d\n'
263 '\n'
264 '%s'
265 % (self._utf8_path(dir_path),
266 ignore_len, ignore_len, ignore_contents)
268 if not Ctx().keep_cvsignore:
269 return
271 checksum = md5()
272 checksum.update(data)
274 # The content length is the length of property data, text data,
275 # and any metadata around/inside around them:
276 self._dumpfile.write(
277 'Node-path: %s\n'
278 'Node-kind: file\n'
279 'Node-action: %s\n'
280 '%s' # no property header if no props
281 'Text-content-length: %d\n'
282 'Text-content-md5: %s\n'
283 'Content-length: %d\n'
284 '\n' % (
285 self._utf8_path(cvs_rev.get_svn_path()), op, props_header,
286 len(data), checksum.hexdigest(), len(data) + len(prop_contents),
290 if prop_contents:
291 self._dumpfile.write(prop_contents)
293 self._dumpfile.write(data)
295 # This record is done (write two newlines -- one to terminate
296 # contents that weren't themselves newline-termination, one to
297 # provide a blank line for readability.
298 self._dumpfile.write('\n\n')
300 def add_path(self, cvs_rev):
301 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
303 self._add_or_change_path(cvs_rev, OP_ADD)
305 def change_path(self, cvs_rev):
306 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
308 self._add_or_change_path(cvs_rev, OP_CHANGE)
310 def delete_lod(self, lod):
311 """Emit the deletion of LOD."""
313 self._dumpfile.write(
314 'Node-path: %s\n'
315 'Node-action: delete\n'
316 '\n'
317 % (self._utf8_path(lod.get_path()),)
319 self._basic_directories.remove(lod.get_path())
321 def delete_path(self, lod, cvs_path):
322 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
323 if basename == '.cvsignore':
324 # When a .cvsignore file is deleted, the directory's svn:ignore
325 # property needs to be deleted.
326 ignore_contents = 'PROPS-END\n'
327 ignore_len = len(ignore_contents)
329 # write headers, then props
330 self._dumpfile.write(
331 'Node-path: %s\n'
332 'Node-kind: dir\n'
333 'Node-action: change\n'
334 'Prop-content-length: %d\n'
335 'Content-length: %d\n'
336 '\n'
337 '%s'
338 % (self._utf8_path(dir_path),
339 ignore_len, ignore_len, ignore_contents)
341 if not Ctx().keep_cvsignore:
342 return
344 self._dumpfile.write(
345 'Node-path: %s\n'
346 'Node-action: delete\n'
347 '\n'
348 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
351 def copy_lod(self, src_lod, dest_lod, src_revnum):
352 # Register the main LOD directory, and create parent directories
353 # as needed:
354 self._register_basic_directory(dest_lod.get_path(), False)
356 self._dumpfile.write(
357 'Node-path: %s\n'
358 'Node-kind: dir\n'
359 'Node-action: add\n'
360 'Node-copyfrom-rev: %d\n'
361 'Node-copyfrom-path: %s\n'
362 '\n'
363 % (self._utf8_path(dest_lod.get_path()),
364 src_revnum, self._utf8_path(src_lod.get_path()))
367 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
368 if isinstance(cvs_path, CVSFile):
369 node_kind = 'file'
370 if cvs_path.rcs_basename == '.cvsignore':
371 # FIXME: Here we have to adjust the containing directory's
372 # svn:ignore property to reflect the addition of the
373 # .cvsignore file to the LOD! This is awkward because we
374 # don't have the contents of the .cvsignore file available.
375 if not Ctx().keep_cvsignore:
376 return
377 elif isinstance(cvs_path, CVSDirectory):
378 node_kind = 'dir'
379 else:
380 raise InternalError()
382 self._dumpfile.write(
383 'Node-path: %s\n'
384 'Node-kind: %s\n'
385 'Node-action: add\n'
386 'Node-copyfrom-rev: %d\n'
387 'Node-copyfrom-path: %s\n'
388 '\n'
390 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
391 node_kind,
392 src_revnum,
393 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
397 def finish(self):
398 """Perform any cleanup necessary after all revisions have been
399 committed."""
401 self._dumpfile.close()
404 def generate_ignores(raw_ignore_val):
405 ignore_vals = [ ]
406 for ignore in raw_ignore_val.split():
407 # Reset the list if we encounter a '!'
408 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
409 if ignore == '!':
410 ignore_vals = [ ]
411 else:
412 ignore_vals.append(ignore)
413 return ignore_vals
416 class LoaderPipe(object):
417 """A file-like object that writes to 'svnadmin load'.
419 Some error checking and reporting are done when writing."""
421 def __init__(self, target):
422 self.loader_pipe = subprocess.Popen(
423 [Ctx().svnadmin_executable, 'load', '-q', target],
424 stdin=subprocess.PIPE,
425 stdout=subprocess.PIPE,
426 stderr=subprocess.PIPE,
428 self.loader_pipe.stdout.close()
430 def write(self, s):
431 try:
432 self.loader_pipe.stdin.write(s)
433 except IOError:
434 raise FatalError(
435 'svnadmin failed with the following output while '
436 'loading the dumpfile:\n%s'
437 % (self.loader_pipe.stderr.read(),)
440 def close(self):
441 self.loader_pipe.stdin.close()
442 error_output = self.loader_pipe.stderr.read()
443 exit_status = self.loader_pipe.wait()
444 del self.loader_pipe
445 if exit_status:
446 raise CommandError('svnadmin load', exit_status, error_output)