Teach RevisionReader.get_content() to handle AppleSingle content.
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blobd0eb9c32337e090963d3158b1622a5b6c405fb23
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 try:
21 from hashlib import md5
22 except ImportError:
23 from md5 import new as md5
26 from cvs2svn_lib import config
27 from cvs2svn_lib.common import FatalError
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import path_split
30 from cvs2svn_lib.context import Ctx
31 from cvs2svn_lib.cvs_path import CVSDirectory
32 from cvs2svn_lib.cvs_path import CVSFile
33 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
34 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 # A mapping from the value of the svn:eol-style property to the EOL
43 # string that should appear in a dumpfile:
44 EOL_STYLE_REPLACEMENTS = {
45 'LF' : '\n',
46 'CR' : '\r',
47 'CRLF' : '\r\n',
48 'native' : '\n',
52 def canonicalize_eol(text, eol):
53 """Replace any end-of-line sequences in TEXT with the string EOL."""
55 text = text.replace('\r\n', '\n')
56 text = text.replace('\r', '\n')
57 if eol != '\n':
58 text = text.replace('\n', eol)
59 return text
62 class DumpfileDelegate(SVNRepositoryDelegate):
63 """Create a Subversion dumpfile."""
65 def __init__(self, revision_reader, dumpfile_path):
66 """Return a new DumpfileDelegate instance, attached to a dumpfile
67 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
69 self._revision_reader = revision_reader
70 self.dumpfile_path = dumpfile_path
72 self.dumpfile = open(self.dumpfile_path, 'wb')
73 self._write_dumpfile_header(self.dumpfile)
75 # A set of the basic project infrastructure project directories
76 # that have been created so far, as SVN paths. (The root
77 # directory is considered to be present at initialization.) This
78 # includes all of the LOD paths, and all of their parent
79 # directories etc.
80 self._basic_directories = set([''])
82 def _write_dumpfile_header(self, dumpfile):
83 # Initialize the dumpfile with the standard headers.
85 # Since the CVS repository doesn't have a UUID, and the Subversion
86 # repository will be created with one anyway, we don't specify a
87 # UUID in the dumpflie
88 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
90 def _utf8_path(self, path):
91 """Return a copy of PATH encoded in UTF-8."""
93 # Convert each path component separately (as they may each use
94 # different encodings).
95 try:
96 return '/'.join([
97 Ctx().cvs_filename_decoder(piece).encode('utf8')
98 for piece in path.split('/')
100 except UnicodeError:
101 raise FatalError(
102 "Unable to convert a path '%s' to internal encoding.\n"
103 "Consider rerunning with one or more '--encoding' parameters or\n"
104 "with '--fallback-encoding'."
105 % (path,))
107 @staticmethod
108 def _string_for_props(properties):
109 """Return PROPERTIES in the form needed for the dumpfile."""
111 prop_strings = []
112 for (k, v) in sorted(properties.iteritems()):
113 if k.startswith('_'):
114 # Such properties are for internal use only.
115 pass
116 elif v is None:
117 # None indicates that the property should be left unset.
118 pass
119 else:
120 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
122 prop_strings.append('PROPS-END\n')
124 return ''.join(prop_strings)
126 def start_commit(self, revnum, revprops):
127 """Emit the start of SVN_COMMIT (an SVNCommit)."""
129 self.revision = revnum
131 # The start of a new commit typically looks like this:
133 # Revision-number: 1
134 # Prop-content-length: 129
135 # Content-length: 129
137 # K 7
138 # svn:log
139 # V 27
140 # Log message for revision 1.
141 # K 10
142 # svn:author
143 # V 7
144 # jrandom
145 # K 8
146 # svn:date
147 # V 27
148 # 2003-04-22T22:57:58.132837Z
149 # PROPS-END
151 # Notice that the length headers count everything -- not just the
152 # length of the data but also the lengths of the lengths, including
153 # the 'K ' or 'V ' prefixes.
155 # The reason there are both Prop-content-length and Content-length
156 # is that the former includes just props, while the latter includes
157 # everything. That's the generic header form for any entity in a
158 # dumpfile. But since revisions only have props, the two lengths
159 # are always the same for revisions.
161 # Calculate the output needed for the property definitions.
162 all_prop_strings = self._string_for_props(revprops)
163 total_len = len(all_prop_strings)
165 # Print the revision header and revprops
166 self.dumpfile.write(
167 'Revision-number: %d\n'
168 'Prop-content-length: %d\n'
169 'Content-length: %d\n'
170 '\n'
171 '%s'
172 '\n'
173 % (self.revision, total_len, total_len, all_prop_strings)
176 def end_commit(self):
177 pass
179 def _make_any_dir(self, path):
180 """Emit the creation of directory PATH."""
182 self.dumpfile.write(
183 "Node-path: %s\n"
184 "Node-kind: dir\n"
185 "Node-action: add\n"
186 "\n"
187 "\n"
188 % self._utf8_path(path)
191 def _register_basic_directory(self, path, create):
192 """Register the creation of PATH if it is not already there.
194 Create any parent directories that do not already exist. If
195 CREATE is set, also create PATH if it doesn't already exist. This
196 method should only be used for the LOD paths and the directories
197 containing them, not for directories within an LOD path."""
199 if path not in self._basic_directories:
200 # Make sure that the parent directory is present:
201 self._register_basic_directory(path_split(path)[0], True)
202 if create:
203 self._make_any_dir(path)
204 self._basic_directories.add(path)
206 def initialize_project(self, project):
207 """Create any initial directories for the project.
209 The trunk, tags, and branches directories directories are created
210 the first time the project is seen. Be sure not to create parent
211 directories that already exist (e.g., because two directories
212 share part of their paths either within or across projects)."""
214 for path in project.get_initial_directories():
215 self._register_basic_directory(path, True)
217 def initialize_lod(self, lod):
218 lod_path = lod.get_path()
219 if lod_path:
220 self._register_basic_directory(lod_path, True)
222 def mkdir(self, lod, cvs_directory):
223 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
225 def _add_or_change_path(self, cvs_rev, op):
226 """Emit the addition or change corresponding to CVS_REV.
228 OP is either the constant OP_ADD or OP_CHANGE."""
230 assert op in [OP_ADD, OP_CHANGE]
232 # The property handling here takes advantage of an undocumented
233 # but IMHO consistent feature of the Subversion dumpfile-loading
234 # code. When a node's properties aren't mentioned (that is, the
235 # "Prop-content-length:" header is absent, no properties are
236 # listed at all, and there is no "PROPS-END\n" line) then no
237 # change is made to the node's properties.
239 # This is consistent with the way dumpfiles behave w.r.t. text
240 # content changes, so I'm comfortable relying on it. If you
241 # commit a change to *just* the properties of some node that
242 # already has text contents from a previous revision, then in the
243 # dumpfile output for the prop change, no "Text-content-length:"
244 # nor "Text-content-md5:" header will be present, and the text of
245 # the file will not be given. But this does not cause the file's
246 # text to be erased! It simply remains unchanged.
248 # This works out great for cvs2svn, due to lucky coincidences:
250 # For files, we set most properties in the first revision and
251 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
252 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
253 # is no need to remember the full set of properties on a given
254 # file once we've set it.
256 # For directories, the only property we set is "svn:ignore", and
257 # while we may change it after the first revision, we always do so
258 # based on the contents of a ".cvsignore" file -- in other words,
259 # CVS is doing the remembering for us, so we still don't have to
260 # preserve the previous value of the property ourselves.
262 # Calculate the (sorted-by-name) property string and length, if any.
263 svn_props = cvs_rev.get_properties()
264 if cvs_rev.properties_changed:
265 prop_contents = self._string_for_props(svn_props)
266 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
267 else:
268 prop_contents = ''
269 props_header = ''
271 data = self._revision_reader.get_content(cvs_rev)
273 # Convert all EOLs to LFs if neccessary
274 eol_style = svn_props.get('svn:eol-style', None)
275 if eol_style:
276 eol = EOL_STYLE_REPLACEMENTS[eol_style]
277 data = canonicalize_eol(data, eol)
279 # treat .cvsignore as a directory property
280 dir_path, basename = path_split(cvs_rev.get_svn_path())
281 if basename == '.cvsignore':
282 ignore_vals = generate_ignores(data)
283 ignore_contents = '\n'.join(ignore_vals)
284 if ignore_contents:
285 ignore_contents += '\n'
286 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
287 (len(ignore_contents), ignore_contents))
288 ignore_contents += 'PROPS-END\n'
289 ignore_len = len(ignore_contents)
291 # write headers, then props
292 self.dumpfile.write(
293 'Node-path: %s\n'
294 'Node-kind: dir\n'
295 'Node-action: change\n'
296 'Prop-content-length: %d\n'
297 'Content-length: %d\n'
298 '\n'
299 '%s'
300 % (self._utf8_path(dir_path),
301 ignore_len, ignore_len, ignore_contents)
303 if not Ctx().keep_cvsignore:
304 return
306 self.dumpfile.write(
307 'Node-path: %s\n'
308 'Node-kind: file\n'
309 'Node-action: %s\n'
310 '%s' # no property header if no props
311 % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
314 pos = self.dumpfile.tell()
316 content_header_fmt = (
317 'Text-content-length: %16d\n'
318 'Text-content-md5: %32s\n'
319 'Content-length: %16d\n'
320 '\n'
323 self.dumpfile.write(content_header_fmt % (0, '', 0,))
325 if prop_contents:
326 self.dumpfile.write(prop_contents)
328 # Insert the rev contents, calculating length and checksum.
329 checksum = md5()
330 checksum.update(data)
331 length = len(data)
332 self.dumpfile.write(data)
334 # Go back to overwrite the length and checksum headers with the
335 # correct values. The content length is the length of property
336 # data, text data, and any metadata around/inside around them:
337 self.dumpfile.seek(pos, 0)
338 self.dumpfile.write(
339 content_header_fmt
340 % (length, checksum.hexdigest(), length + len(prop_contents),)
343 # Jump back to the end of the stream
344 self.dumpfile.seek(0, 2)
346 # This record is done (write two newlines -- one to terminate
347 # contents that weren't themselves newline-termination, one to
348 # provide a blank line for readability.
349 self.dumpfile.write('\n\n')
351 def add_path(self, cvs_rev):
352 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
354 self._add_or_change_path(cvs_rev, OP_ADD)
356 def change_path(self, cvs_rev):
357 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
359 self._add_or_change_path(cvs_rev, OP_CHANGE)
361 def delete_lod(self, lod):
362 """Emit the deletion of LOD."""
364 self.dumpfile.write(
365 'Node-path: %s\n'
366 'Node-action: delete\n'
367 '\n'
368 % (self._utf8_path(lod.get_path()),)
370 self._basic_directories.remove(lod.get_path())
372 def delete_path(self, lod, cvs_path):
373 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
374 if basename == '.cvsignore':
375 # When a .cvsignore file is deleted, the directory's svn:ignore
376 # property needs to be deleted.
377 ignore_contents = 'PROPS-END\n'
378 ignore_len = len(ignore_contents)
380 # write headers, then props
381 self.dumpfile.write(
382 'Node-path: %s\n'
383 'Node-kind: dir\n'
384 'Node-action: change\n'
385 'Prop-content-length: %d\n'
386 'Content-length: %d\n'
387 '\n'
388 '%s'
389 % (self._utf8_path(dir_path),
390 ignore_len, ignore_len, ignore_contents)
392 if not Ctx().keep_cvsignore:
393 return
395 self.dumpfile.write(
396 'Node-path: %s\n'
397 'Node-action: delete\n'
398 '\n'
399 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
402 def copy_lod(self, src_lod, dest_lod, src_revnum):
403 # Register the main LOD directory, and create parent directories
404 # as needed:
405 self._register_basic_directory(dest_lod.get_path(), False)
407 self.dumpfile.write(
408 'Node-path: %s\n'
409 'Node-kind: dir\n'
410 'Node-action: add\n'
411 'Node-copyfrom-rev: %d\n'
412 'Node-copyfrom-path: %s\n'
413 '\n'
414 % (self._utf8_path(dest_lod.get_path()),
415 src_revnum, self._utf8_path(src_lod.get_path()))
418 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
419 if isinstance(cvs_path, CVSFile):
420 node_kind = 'file'
421 if cvs_path.basename == '.cvsignore':
422 # FIXME: Here we have to adjust the containing directory's
423 # svn:ignore property to reflect the addition of the
424 # .cvsignore file to the LOD! This is awkward because we
425 # don't have the contents of the .cvsignore file available.
426 if not Ctx().keep_cvsignore:
427 return
428 elif isinstance(cvs_path, CVSDirectory):
429 node_kind = 'dir'
430 else:
431 raise InternalError()
433 self.dumpfile.write(
434 'Node-path: %s\n'
435 'Node-kind: %s\n'
436 'Node-action: add\n'
437 'Node-copyfrom-rev: %d\n'
438 'Node-copyfrom-path: %s\n'
439 '\n'
441 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
442 node_kind,
443 src_revnum,
444 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
448 def finish(self):
449 """Perform any cleanup necessary after all revisions have been
450 committed."""
452 self.dumpfile.close()
455 def generate_ignores(raw_ignore_val):
456 ignore_vals = [ ]
457 for ignore in raw_ignore_val.split():
458 # Reset the list if we encounter a '!'
459 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
460 if ignore == '!':
461 ignore_vals = [ ]
462 else:
463 ignore_vals.append(ignore)
464 return ignore_vals