Mention that FilterSymbolsPass also calls the RevisionCollector callbacks.
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blob080e45433f86caa7d8b2fe1e9f4bda9e6052c21c
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 try:
21 from hashlib import md5
22 except ImportError:
23 from md5 import new as md5
26 from cvs2svn_lib import config
27 from cvs2svn_lib.common import FatalError
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import canonicalize_eol
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
35 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
38 # Things that can happen to a file.
39 OP_ADD = 'add'
40 OP_CHANGE = 'change'
43 class DumpfileDelegate(SVNRepositoryDelegate):
44 """Create a Subversion dumpfile."""
46 def __init__(self, revision_reader, dumpfile_path):
47 """Return a new DumpfileDelegate instance, attached to a dumpfile
48 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
50 self._revision_reader = revision_reader
51 self.dumpfile_path = dumpfile_path
53 self.dumpfile = open(self.dumpfile_path, 'wb')
54 self._write_dumpfile_header(self.dumpfile)
56 # A set of the basic project infrastructure project directories
57 # that have been created so far, as SVN paths. (The root
58 # directory is considered to be present at initialization.) This
59 # includes all of the LOD paths, and all of their parent
60 # directories etc.
61 self._basic_directories = set([''])
63 def _write_dumpfile_header(self, dumpfile):
64 # Initialize the dumpfile with the standard headers.
66 # Since the CVS repository doesn't have a UUID, and the Subversion
67 # repository will be created with one anyway, we don't specify a
68 # UUID in the dumpflie
69 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
71 def _utf8_path(self, path):
72 """Return a copy of PATH encoded in UTF-8."""
74 # Convert each path component separately (as they may each use
75 # different encodings).
76 try:
77 return '/'.join([
78 Ctx().cvs_filename_decoder(piece).encode('utf8')
79 for piece in path.split('/')
81 except UnicodeError:
82 raise FatalError(
83 "Unable to convert a path '%s' to internal encoding.\n"
84 "Consider rerunning with one or more '--encoding' parameters or\n"
85 "with '--fallback-encoding'."
86 % (path,))
88 @staticmethod
89 def _string_for_props(properties):
90 """Return PROPERTIES in the form needed for the dumpfile."""
92 prop_strings = []
93 for (k, v) in sorted(properties.iteritems()):
94 if k.startswith('_'):
95 # Such properties are for internal use only.
96 pass
97 elif v is None:
98 # None indicates that the property should be left unset.
99 pass
100 else:
101 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
103 prop_strings.append('PROPS-END\n')
105 return ''.join(prop_strings)
107 def start_commit(self, revnum, revprops):
108 """Emit the start of SVN_COMMIT (an SVNCommit)."""
110 self.revision = revnum
112 # The start of a new commit typically looks like this:
114 # Revision-number: 1
115 # Prop-content-length: 129
116 # Content-length: 129
118 # K 7
119 # svn:log
120 # V 27
121 # Log message for revision 1.
122 # K 10
123 # svn:author
124 # V 7
125 # jrandom
126 # K 8
127 # svn:date
128 # V 27
129 # 2003-04-22T22:57:58.132837Z
130 # PROPS-END
132 # Notice that the length headers count everything -- not just the
133 # length of the data but also the lengths of the lengths, including
134 # the 'K ' or 'V ' prefixes.
136 # The reason there are both Prop-content-length and Content-length
137 # is that the former includes just props, while the latter includes
138 # everything. That's the generic header form for any entity in a
139 # dumpfile. But since revisions only have props, the two lengths
140 # are always the same for revisions.
142 # Calculate the output needed for the property definitions.
143 all_prop_strings = self._string_for_props(revprops)
144 total_len = len(all_prop_strings)
146 # Print the revision header and revprops
147 self.dumpfile.write(
148 'Revision-number: %d\n'
149 'Prop-content-length: %d\n'
150 'Content-length: %d\n'
151 '\n'
152 '%s'
153 '\n'
154 % (self.revision, total_len, total_len, all_prop_strings)
157 def end_commit(self):
158 pass
160 def _make_any_dir(self, path):
161 """Emit the creation of directory PATH."""
163 self.dumpfile.write(
164 "Node-path: %s\n"
165 "Node-kind: dir\n"
166 "Node-action: add\n"
167 "\n"
168 "\n"
169 % self._utf8_path(path)
172 def _register_basic_directory(self, path, create):
173 """Register the creation of PATH if it is not already there.
175 Create any parent directories that do not already exist. If
176 CREATE is set, also create PATH if it doesn't already exist. This
177 method should only be used for the LOD paths and the directories
178 containing them, not for directories within an LOD path."""
180 if path not in self._basic_directories:
181 # Make sure that the parent directory is present:
182 self._register_basic_directory(path_split(path)[0], True)
183 if create:
184 self._make_any_dir(path)
185 self._basic_directories.add(path)
187 def initialize_project(self, project):
188 """Create any initial directories for the project.
190 The trunk, tags, and branches directories directories are created
191 the first time the project is seen. Be sure not to create parent
192 directories that already exist (e.g., because two directories
193 share part of their paths either within or across projects)."""
195 for path in project.get_initial_directories():
196 self._register_basic_directory(path, True)
198 def initialize_lod(self, lod):
199 lod_path = lod.get_path()
200 if lod_path:
201 self._register_basic_directory(lod_path, True)
203 def mkdir(self, lod, cvs_directory):
204 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
206 def _add_or_change_path(self, cvs_rev, op):
207 """Emit the addition or change corresponding to CVS_REV.
209 OP is either the constant OP_ADD or OP_CHANGE."""
211 assert op in [OP_ADD, OP_CHANGE]
213 # The property handling here takes advantage of an undocumented
214 # but IMHO consistent feature of the Subversion dumpfile-loading
215 # code. When a node's properties aren't mentioned (that is, the
216 # "Prop-content-length:" header is absent, no properties are
217 # listed at all, and there is no "PROPS-END\n" line) then no
218 # change is made to the node's properties.
220 # This is consistent with the way dumpfiles behave w.r.t. text
221 # content changes, so I'm comfortable relying on it. If you
222 # commit a change to *just* the properties of some node that
223 # already has text contents from a previous revision, then in the
224 # dumpfile output for the prop change, no "Text-content-length:"
225 # nor "Text-content-md5:" header will be present, and the text of
226 # the file will not be given. But this does not cause the file's
227 # text to be erased! It simply remains unchanged.
229 # This works out great for cvs2svn, due to lucky coincidences:
231 # For files, we set most properties in the first revision and
232 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
233 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
234 # is no need to remember the full set of properties on a given
235 # file once we've set it.
237 # For directories, the only property we set is "svn:ignore", and
238 # while we may change it after the first revision, we always do so
239 # based on the contents of a ".cvsignore" file -- in other words,
240 # CVS is doing the remembering for us, so we still don't have to
241 # preserve the previous value of the property ourselves.
243 # Calculate the (sorted-by-name) property string and length, if any.
244 svn_props = cvs_rev.get_properties()
245 if cvs_rev.properties_changed:
246 prop_contents = self._string_for_props(svn_props)
247 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
248 else:
249 prop_contents = ''
250 props_header = ''
252 data = self._revision_reader.get_content(cvs_rev)
254 # treat .cvsignore as a directory property
255 dir_path, basename = path_split(cvs_rev.get_svn_path())
256 if basename == '.cvsignore':
257 ignore_contents = self._string_for_props({
258 'svn:ignore' : ''.join((s + '\n') for s in generate_ignores(data))
260 ignore_len = len(ignore_contents)
262 # write headers, then props
263 self.dumpfile.write(
264 'Node-path: %s\n'
265 'Node-kind: dir\n'
266 'Node-action: change\n'
267 'Prop-content-length: %d\n'
268 'Content-length: %d\n'
269 '\n'
270 '%s'
271 % (self._utf8_path(dir_path),
272 ignore_len, ignore_len, ignore_contents)
274 if not Ctx().keep_cvsignore:
275 return
277 self.dumpfile.write(
278 'Node-path: %s\n'
279 'Node-kind: file\n'
280 'Node-action: %s\n'
281 '%s' # no property header if no props
282 % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
285 pos = self.dumpfile.tell()
287 content_header_fmt = (
288 'Text-content-length: %16d\n'
289 'Text-content-md5: %32s\n'
290 'Content-length: %16d\n'
291 '\n'
294 self.dumpfile.write(content_header_fmt % (0, '', 0,))
296 if prop_contents:
297 self.dumpfile.write(prop_contents)
299 # Insert the rev contents, calculating length and checksum.
300 checksum = md5()
301 checksum.update(data)
302 length = len(data)
303 self.dumpfile.write(data)
305 # Go back to overwrite the length and checksum headers with the
306 # correct values. The content length is the length of property
307 # data, text data, and any metadata around/inside around them:
308 self.dumpfile.seek(pos, 0)
309 self.dumpfile.write(
310 content_header_fmt
311 % (length, checksum.hexdigest(), length + len(prop_contents),)
314 # Jump back to the end of the stream
315 self.dumpfile.seek(0, 2)
317 # This record is done (write two newlines -- one to terminate
318 # contents that weren't themselves newline-termination, one to
319 # provide a blank line for readability.
320 self.dumpfile.write('\n\n')
322 def add_path(self, cvs_rev):
323 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
325 self._add_or_change_path(cvs_rev, OP_ADD)
327 def change_path(self, cvs_rev):
328 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
330 self._add_or_change_path(cvs_rev, OP_CHANGE)
332 def delete_lod(self, lod):
333 """Emit the deletion of LOD."""
335 self.dumpfile.write(
336 'Node-path: %s\n'
337 'Node-action: delete\n'
338 '\n'
339 % (self._utf8_path(lod.get_path()),)
341 self._basic_directories.remove(lod.get_path())
343 def delete_path(self, lod, cvs_path):
344 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
345 if basename == '.cvsignore':
346 # When a .cvsignore file is deleted, the directory's svn:ignore
347 # property needs to be deleted.
348 ignore_contents = 'PROPS-END\n'
349 ignore_len = len(ignore_contents)
351 # write headers, then props
352 self.dumpfile.write(
353 'Node-path: %s\n'
354 'Node-kind: dir\n'
355 'Node-action: change\n'
356 'Prop-content-length: %d\n'
357 'Content-length: %d\n'
358 '\n'
359 '%s'
360 % (self._utf8_path(dir_path),
361 ignore_len, ignore_len, ignore_contents)
363 if not Ctx().keep_cvsignore:
364 return
366 self.dumpfile.write(
367 'Node-path: %s\n'
368 'Node-action: delete\n'
369 '\n'
370 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
373 def copy_lod(self, src_lod, dest_lod, src_revnum):
374 # Register the main LOD directory, and create parent directories
375 # as needed:
376 self._register_basic_directory(dest_lod.get_path(), False)
378 self.dumpfile.write(
379 'Node-path: %s\n'
380 'Node-kind: dir\n'
381 'Node-action: add\n'
382 'Node-copyfrom-rev: %d\n'
383 'Node-copyfrom-path: %s\n'
384 '\n'
385 % (self._utf8_path(dest_lod.get_path()),
386 src_revnum, self._utf8_path(src_lod.get_path()))
389 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
390 if isinstance(cvs_path, CVSFile):
391 node_kind = 'file'
392 if cvs_path.basename == '.cvsignore':
393 # FIXME: Here we have to adjust the containing directory's
394 # svn:ignore property to reflect the addition of the
395 # .cvsignore file to the LOD! This is awkward because we
396 # don't have the contents of the .cvsignore file available.
397 if not Ctx().keep_cvsignore:
398 return
399 elif isinstance(cvs_path, CVSDirectory):
400 node_kind = 'dir'
401 else:
402 raise InternalError()
404 self.dumpfile.write(
405 'Node-path: %s\n'
406 'Node-kind: %s\n'
407 'Node-action: add\n'
408 'Node-copyfrom-rev: %d\n'
409 'Node-copyfrom-path: %s\n'
410 '\n'
412 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
413 node_kind,
414 src_revnum,
415 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
419 def finish(self):
420 """Perform any cleanup necessary after all revisions have been
421 committed."""
423 self.dumpfile.close()
426 def generate_ignores(raw_ignore_val):
427 ignore_vals = [ ]
428 for ignore in raw_ignore_val.split():
429 # Reset the list if we encounter a '!'
430 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
431 if ignore == '!':
432 ignore_vals = [ ]
433 else:
434 ignore_vals.append(ignore)
435 return ignore_vals