Move definitions of generate_ignores() higher in the file.
[cvs2svn.git] / cvs2svn_lib / svn_dump.py
blob1f71b68ec19f33413384f326ad377fd14baa38d1
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2010 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains code to output to Subversion dumpfile format."""
20 import subprocess
22 try:
23 from hashlib import md5
24 except ImportError:
25 from md5 import new as md5
27 from cvs2svn_lib.common import CommandError
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 def utf8_path(path):
43 """Return a copy of PATH encoded in UTF-8."""
45 try:
46 return Ctx().cvs_filename_decoder.decode_path(path).encode('utf8')
47 except UnicodeError:
48 raise FatalError(
49 "Unable to convert a path '%s' to internal encoding.\n"
50 "Consider rerunning with one or more '--encoding' parameters or\n"
51 "with '--fallback-encoding'."
52 % (path,))
55 def generate_ignores(raw_ignore_val):
56 ignore_vals = [ ]
57 for ignore in raw_ignore_val.split():
58 # Reset the list if we encounter a '!'
59 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
60 if ignore == '!':
61 ignore_vals = [ ]
62 else:
63 ignore_vals.append(ignore)
64 return ignore_vals
67 class DumpstreamDelegate(SVNRepositoryDelegate):
68 """Write output in Subversion dumpfile format."""
70 def __init__(self, revision_reader, dumpfile):
71 """Return a new DumpstreamDelegate instance.
73 DUMPFILE should be a file-like object opened in binary mode, to
74 which the dump stream will be written. The only methods called on
75 the object are write() and close()."""
77 self._revision_reader = revision_reader
78 self._dumpfile = dumpfile
79 self._write_dumpfile_header()
81 # A set of the basic project infrastructure project directories
82 # that have been created so far, as SVN paths. (The root
83 # directory is considered to be present at initialization.) This
84 # includes all of the LOD paths, and all of their parent
85 # directories etc.
86 self._basic_directories = set([''])
88 def _write_dumpfile_header(self):
89 """Initialize the dumpfile with the standard headers.
91 Since the CVS repository doesn't have a UUID, and the Subversion
92 repository will be created with one anyway, we don't specify a
93 UUID in the dumpfile."""
95 self._dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
97 @staticmethod
98 def _string_for_props(properties):
99 """Return PROPERTIES in the form needed for the dumpfile."""
101 prop_strings = []
102 for (k, v) in sorted(properties.iteritems()):
103 if k.startswith('_'):
104 # Such properties are for internal use only.
105 pass
106 elif v is None:
107 # None indicates that the property should be left unset.
108 pass
109 else:
110 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
112 prop_strings.append('PROPS-END\n')
114 return ''.join(prop_strings)
116 def start_commit(self, revnum, revprops):
117 """Emit the start of SVN_COMMIT (an SVNCommit)."""
119 # The start of a new commit typically looks like this:
121 # Revision-number: 1
122 # Prop-content-length: 129
123 # Content-length: 129
125 # K 7
126 # svn:log
127 # V 27
128 # Log message for revision 1.
129 # K 10
130 # svn:author
131 # V 7
132 # jrandom
133 # K 8
134 # svn:date
135 # V 27
136 # 2003-04-22T22:57:58.132837Z
137 # PROPS-END
139 # Notice that the length headers count everything -- not just the
140 # length of the data but also the lengths of the lengths, including
141 # the 'K ' or 'V ' prefixes.
143 # The reason there are both Prop-content-length and Content-length
144 # is that the former includes just props, while the latter includes
145 # everything. That's the generic header form for any entity in a
146 # dumpfile. But since revisions only have props, the two lengths
147 # are always the same for revisions.
149 # Calculate the output needed for the property definitions.
150 all_prop_strings = self._string_for_props(revprops)
151 total_len = len(all_prop_strings)
153 # Print the revision header and revprops
154 self._dumpfile.write(
155 'Revision-number: %d\n'
156 'Prop-content-length: %d\n'
157 'Content-length: %d\n'
158 '\n'
159 '%s'
160 '\n'
161 % (revnum, total_len, total_len, all_prop_strings)
164 def end_commit(self):
165 pass
167 def _make_any_dir(self, path):
168 """Emit the creation of directory PATH."""
170 self._dumpfile.write(
171 "Node-path: %s\n"
172 "Node-kind: dir\n"
173 "Node-action: add\n"
174 "\n"
175 "\n"
176 % utf8_path(path)
179 def _register_basic_directory(self, path, create):
180 """Register the creation of PATH if it is not already there.
182 Create any parent directories that do not already exist. If
183 CREATE is set, also create PATH if it doesn't already exist. This
184 method should only be used for the LOD paths and the directories
185 containing them, not for directories within an LOD path."""
187 if path not in self._basic_directories:
188 # Make sure that the parent directory is present:
189 self._register_basic_directory(path_split(path)[0], True)
190 if create:
191 self._make_any_dir(path)
192 self._basic_directories.add(path)
194 def initialize_project(self, project):
195 """Create any initial directories for the project.
197 The trunk, tags, and branches directories directories are created
198 the first time the project is seen. Be sure not to create parent
199 directories that already exist (e.g., because two directories
200 share part of their paths either within or across projects)."""
202 for path in project.get_initial_directories():
203 self._register_basic_directory(path, True)
205 def initialize_lod(self, lod):
206 lod_path = lod.get_path()
207 if lod_path:
208 self._register_basic_directory(lod_path, True)
210 def mkdir(self, lod, cvs_directory):
211 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
213 def _add_or_change_path(self, cvs_rev, op):
214 """Emit the addition or change corresponding to CVS_REV.
216 OP is either the constant OP_ADD or OP_CHANGE."""
218 assert op in [OP_ADD, OP_CHANGE]
220 # The property handling here takes advantage of an undocumented
221 # but IMHO consistent feature of the Subversion dumpfile-loading
222 # code. When a node's properties aren't mentioned (that is, the
223 # "Prop-content-length:" header is absent, no properties are
224 # listed at all, and there is no "PROPS-END\n" line) then no
225 # change is made to the node's properties.
227 # This is consistent with the way dumpfiles behave w.r.t. text
228 # content changes, so I'm comfortable relying on it. If you
229 # commit a change to *just* the properties of some node that
230 # already has text contents from a previous revision, then in the
231 # dumpfile output for the prop change, no "Text-content-length:"
232 # nor "Text-content-md5:" header will be present, and the text of
233 # the file will not be given. But this does not cause the file's
234 # text to be erased! It simply remains unchanged.
236 # This works out great for cvs2svn, due to lucky coincidences:
238 # For files, we set most properties in the first revision and
239 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
240 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
241 # is no need to remember the full set of properties on a given
242 # file once we've set it.
244 # For directories, the only property we set is "svn:ignore", and
245 # while we may change it after the first revision, we always do so
246 # based on the contents of a ".cvsignore" file -- in other words,
247 # CVS is doing the remembering for us, so we still don't have to
248 # preserve the previous value of the property ourselves.
250 # Calculate the (sorted-by-name) property string and length, if any.
251 svn_props = cvs_rev.get_properties()
252 if cvs_rev.properties_changed:
253 prop_contents = self._string_for_props(svn_props)
254 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
255 else:
256 prop_contents = ''
257 props_header = ''
259 data = self._revision_reader.get_content(cvs_rev)
261 # treat .cvsignore as a directory property
262 dir_path, basename = path_split(cvs_rev.get_svn_path())
263 if basename == '.cvsignore':
264 ignore_contents = self._string_for_props({
265 'svn:ignore' : ''.join((s + '\n') for s in generate_ignores(data))
267 ignore_len = len(ignore_contents)
269 # write headers, then props
270 self._dumpfile.write(
271 'Node-path: %s\n'
272 'Node-kind: dir\n'
273 'Node-action: change\n'
274 'Prop-content-length: %d\n'
275 'Content-length: %d\n'
276 '\n'
277 '%s'
278 % (utf8_path(dir_path),
279 ignore_len, ignore_len, ignore_contents)
281 if not Ctx().keep_cvsignore:
282 return
284 checksum = md5()
285 checksum.update(data)
287 # The content length is the length of property data, text data,
288 # and any metadata around/inside around them:
289 self._dumpfile.write(
290 'Node-path: %s\n'
291 'Node-kind: file\n'
292 'Node-action: %s\n'
293 '%s' # no property header if no props
294 'Text-content-length: %d\n'
295 'Text-content-md5: %s\n'
296 'Content-length: %d\n'
297 '\n' % (
298 utf8_path(cvs_rev.get_svn_path()), op, props_header,
299 len(data), checksum.hexdigest(), len(data) + len(prop_contents),
303 if prop_contents:
304 self._dumpfile.write(prop_contents)
306 self._dumpfile.write(data)
308 # This record is done (write two newlines -- one to terminate
309 # contents that weren't themselves newline-termination, one to
310 # provide a blank line for readability.
311 self._dumpfile.write('\n\n')
313 def add_path(self, cvs_rev):
314 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
316 self._add_or_change_path(cvs_rev, OP_ADD)
318 def change_path(self, cvs_rev):
319 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
321 self._add_or_change_path(cvs_rev, OP_CHANGE)
323 def delete_lod(self, lod):
324 """Emit the deletion of LOD."""
326 self._dumpfile.write(
327 'Node-path: %s\n'
328 'Node-action: delete\n'
329 '\n'
330 % (utf8_path(lod.get_path()),)
332 self._basic_directories.remove(lod.get_path())
334 def delete_path(self, lod, cvs_path):
335 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
336 if basename == '.cvsignore':
337 # When a .cvsignore file is deleted, the directory's svn:ignore
338 # property needs to be deleted.
339 ignore_contents = 'PROPS-END\n'
340 ignore_len = len(ignore_contents)
342 # write headers, then props
343 self._dumpfile.write(
344 'Node-path: %s\n'
345 'Node-kind: dir\n'
346 'Node-action: change\n'
347 'Prop-content-length: %d\n'
348 'Content-length: %d\n'
349 '\n'
350 '%s'
351 % (utf8_path(dir_path),
352 ignore_len, ignore_len, ignore_contents)
354 if not Ctx().keep_cvsignore:
355 return
357 self._dumpfile.write(
358 'Node-path: %s\n'
359 'Node-action: delete\n'
360 '\n'
361 % (utf8_path(lod.get_path(cvs_path.cvs_path)),)
364 def copy_lod(self, src_lod, dest_lod, src_revnum):
365 # Register the main LOD directory, and create parent directories
366 # as needed:
367 self._register_basic_directory(dest_lod.get_path(), False)
369 self._dumpfile.write(
370 'Node-path: %s\n'
371 'Node-kind: dir\n'
372 'Node-action: add\n'
373 'Node-copyfrom-rev: %d\n'
374 'Node-copyfrom-path: %s\n'
375 '\n'
376 % (utf8_path(dest_lod.get_path()),
377 src_revnum, utf8_path(src_lod.get_path()))
380 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
381 if isinstance(cvs_path, CVSFile):
382 node_kind = 'file'
383 if cvs_path.rcs_basename == '.cvsignore':
384 # FIXME: Here we have to adjust the containing directory's
385 # svn:ignore property to reflect the addition of the
386 # .cvsignore file to the LOD! This is awkward because we
387 # don't have the contents of the .cvsignore file available.
388 if not Ctx().keep_cvsignore:
389 return
390 elif isinstance(cvs_path, CVSDirectory):
391 node_kind = 'dir'
392 else:
393 raise InternalError()
395 self._dumpfile.write(
396 'Node-path: %s\n'
397 'Node-kind: %s\n'
398 'Node-action: add\n'
399 'Node-copyfrom-rev: %d\n'
400 'Node-copyfrom-path: %s\n'
401 '\n'
403 utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
404 node_kind,
405 src_revnum,
406 utf8_path(src_lod.get_path(cvs_path.cvs_path))
410 def finish(self):
411 """Perform any cleanup necessary after all revisions have been
412 committed."""
414 self._dumpfile.close()
417 class LoaderPipe(object):
418 """A file-like object that writes to 'svnadmin load'.
420 Some error checking and reporting are done when writing."""
422 def __init__(self, target):
423 self.loader_pipe = subprocess.Popen(
424 [Ctx().svnadmin_executable, 'load', '-q', target],
425 stdin=subprocess.PIPE,
426 stdout=subprocess.PIPE,
427 stderr=subprocess.PIPE,
429 self.loader_pipe.stdout.close()
431 def write(self, s):
432 try:
433 self.loader_pipe.stdin.write(s)
434 except IOError:
435 raise FatalError(
436 'svnadmin failed with the following output while '
437 'loading the dumpfile:\n%s'
438 % (self.loader_pipe.stderr.read(),)
441 def close(self):
442 self.loader_pipe.stdin.close()
443 error_output = self.loader_pipe.stderr.read()
444 exit_status = self.loader_pipe.wait()
445 del self.loader_pipe
446 if exit_status:
447 raise CommandError('svnadmin load', exit_status, error_output)