Move the function canonicalize_eol() to common.py.
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blob0178eb78a6cd56ea989f432b228a48dffd425e92
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 try:
21 from hashlib import md5
22 except ImportError:
23 from md5 import new as md5
26 from cvs2svn_lib import config
27 from cvs2svn_lib.common import FatalError
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import canonicalize_eol
30 from cvs2svn_lib.common import path_split
31 from cvs2svn_lib.context import Ctx
32 from cvs2svn_lib.cvs_path import CVSDirectory
33 from cvs2svn_lib.cvs_path import CVSFile
34 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
35 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
38 # Things that can happen to a file.
39 OP_ADD = 'add'
40 OP_CHANGE = 'change'
43 # A mapping from the value of the svn:eol-style property to the EOL
44 # string that should appear in a dumpfile:
45 EOL_STYLE_REPLACEMENTS = {
46 'LF' : '\n',
47 'CR' : '\r',
48 'CRLF' : '\r\n',
49 'native' : '\n',
53 class DumpfileDelegate(SVNRepositoryDelegate):
54 """Create a Subversion dumpfile."""
56 def __init__(self, revision_reader, dumpfile_path):
57 """Return a new DumpfileDelegate instance, attached to a dumpfile
58 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
60 self._revision_reader = revision_reader
61 self.dumpfile_path = dumpfile_path
63 self.dumpfile = open(self.dumpfile_path, 'wb')
64 self._write_dumpfile_header(self.dumpfile)
66 # A set of the basic project infrastructure project directories
67 # that have been created so far, as SVN paths. (The root
68 # directory is considered to be present at initialization.) This
69 # includes all of the LOD paths, and all of their parent
70 # directories etc.
71 self._basic_directories = set([''])
73 def _write_dumpfile_header(self, dumpfile):
74 # Initialize the dumpfile with the standard headers.
76 # Since the CVS repository doesn't have a UUID, and the Subversion
77 # repository will be created with one anyway, we don't specify a
78 # UUID in the dumpflie
79 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
81 def _utf8_path(self, path):
82 """Return a copy of PATH encoded in UTF-8."""
84 # Convert each path component separately (as they may each use
85 # different encodings).
86 try:
87 return '/'.join([
88 Ctx().cvs_filename_decoder(piece).encode('utf8')
89 for piece in path.split('/')
91 except UnicodeError:
92 raise FatalError(
93 "Unable to convert a path '%s' to internal encoding.\n"
94 "Consider rerunning with one or more '--encoding' parameters or\n"
95 "with '--fallback-encoding'."
96 % (path,))
98 @staticmethod
99 def _string_for_props(properties):
100 """Return PROPERTIES in the form needed for the dumpfile."""
102 prop_strings = []
103 for (k, v) in sorted(properties.iteritems()):
104 if k.startswith('_'):
105 # Such properties are for internal use only.
106 pass
107 elif v is None:
108 # None indicates that the property should be left unset.
109 pass
110 else:
111 prop_strings.append('K %d\n%s\nV %d\n%s\n' % (len(k), k, len(v), v))
113 prop_strings.append('PROPS-END\n')
115 return ''.join(prop_strings)
117 def start_commit(self, revnum, revprops):
118 """Emit the start of SVN_COMMIT (an SVNCommit)."""
120 self.revision = revnum
122 # The start of a new commit typically looks like this:
124 # Revision-number: 1
125 # Prop-content-length: 129
126 # Content-length: 129
128 # K 7
129 # svn:log
130 # V 27
131 # Log message for revision 1.
132 # K 10
133 # svn:author
134 # V 7
135 # jrandom
136 # K 8
137 # svn:date
138 # V 27
139 # 2003-04-22T22:57:58.132837Z
140 # PROPS-END
142 # Notice that the length headers count everything -- not just the
143 # length of the data but also the lengths of the lengths, including
144 # the 'K ' or 'V ' prefixes.
146 # The reason there are both Prop-content-length and Content-length
147 # is that the former includes just props, while the latter includes
148 # everything. That's the generic header form for any entity in a
149 # dumpfile. But since revisions only have props, the two lengths
150 # are always the same for revisions.
152 # Calculate the output needed for the property definitions.
153 all_prop_strings = self._string_for_props(revprops)
154 total_len = len(all_prop_strings)
156 # Print the revision header and revprops
157 self.dumpfile.write(
158 'Revision-number: %d\n'
159 'Prop-content-length: %d\n'
160 'Content-length: %d\n'
161 '\n'
162 '%s'
163 '\n'
164 % (self.revision, total_len, total_len, all_prop_strings)
167 def end_commit(self):
168 pass
170 def _make_any_dir(self, path):
171 """Emit the creation of directory PATH."""
173 self.dumpfile.write(
174 "Node-path: %s\n"
175 "Node-kind: dir\n"
176 "Node-action: add\n"
177 "\n"
178 "\n"
179 % self._utf8_path(path)
182 def _register_basic_directory(self, path, create):
183 """Register the creation of PATH if it is not already there.
185 Create any parent directories that do not already exist. If
186 CREATE is set, also create PATH if it doesn't already exist. This
187 method should only be used for the LOD paths and the directories
188 containing them, not for directories within an LOD path."""
190 if path not in self._basic_directories:
191 # Make sure that the parent directory is present:
192 self._register_basic_directory(path_split(path)[0], True)
193 if create:
194 self._make_any_dir(path)
195 self._basic_directories.add(path)
197 def initialize_project(self, project):
198 """Create any initial directories for the project.
200 The trunk, tags, and branches directories directories are created
201 the first time the project is seen. Be sure not to create parent
202 directories that already exist (e.g., because two directories
203 share part of their paths either within or across projects)."""
205 for path in project.get_initial_directories():
206 self._register_basic_directory(path, True)
208 def initialize_lod(self, lod):
209 lod_path = lod.get_path()
210 if lod_path:
211 self._register_basic_directory(lod_path, True)
213 def mkdir(self, lod, cvs_directory):
214 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
216 def _add_or_change_path(self, cvs_rev, op):
217 """Emit the addition or change corresponding to CVS_REV.
219 OP is either the constant OP_ADD or OP_CHANGE."""
221 assert op in [OP_ADD, OP_CHANGE]
223 # The property handling here takes advantage of an undocumented
224 # but IMHO consistent feature of the Subversion dumpfile-loading
225 # code. When a node's properties aren't mentioned (that is, the
226 # "Prop-content-length:" header is absent, no properties are
227 # listed at all, and there is no "PROPS-END\n" line) then no
228 # change is made to the node's properties.
230 # This is consistent with the way dumpfiles behave w.r.t. text
231 # content changes, so I'm comfortable relying on it. If you
232 # commit a change to *just* the properties of some node that
233 # already has text contents from a previous revision, then in the
234 # dumpfile output for the prop change, no "Text-content-length:"
235 # nor "Text-content-md5:" header will be present, and the text of
236 # the file will not be given. But this does not cause the file's
237 # text to be erased! It simply remains unchanged.
239 # This works out great for cvs2svn, due to lucky coincidences:
241 # For files, we set most properties in the first revision and
242 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
243 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
244 # is no need to remember the full set of properties on a given
245 # file once we've set it.
247 # For directories, the only property we set is "svn:ignore", and
248 # while we may change it after the first revision, we always do so
249 # based on the contents of a ".cvsignore" file -- in other words,
250 # CVS is doing the remembering for us, so we still don't have to
251 # preserve the previous value of the property ourselves.
253 # Calculate the (sorted-by-name) property string and length, if any.
254 svn_props = cvs_rev.get_properties()
255 if cvs_rev.properties_changed:
256 prop_contents = self._string_for_props(svn_props)
257 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
258 else:
259 prop_contents = ''
260 props_header = ''
262 data = self._revision_reader.get_content(cvs_rev)
264 # Convert all EOLs to LFs if neccessary
265 eol_style = svn_props.get('svn:eol-style', None)
266 if eol_style:
267 eol = EOL_STYLE_REPLACEMENTS[eol_style]
268 data = canonicalize_eol(data, eol)
270 # treat .cvsignore as a directory property
271 dir_path, basename = path_split(cvs_rev.get_svn_path())
272 if basename == '.cvsignore':
273 ignore_vals = generate_ignores(data)
274 ignore_contents = '\n'.join(ignore_vals)
275 if ignore_contents:
276 ignore_contents += '\n'
277 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
278 (len(ignore_contents), ignore_contents))
279 ignore_contents += 'PROPS-END\n'
280 ignore_len = len(ignore_contents)
282 # write headers, then props
283 self.dumpfile.write(
284 'Node-path: %s\n'
285 'Node-kind: dir\n'
286 'Node-action: change\n'
287 'Prop-content-length: %d\n'
288 'Content-length: %d\n'
289 '\n'
290 '%s'
291 % (self._utf8_path(dir_path),
292 ignore_len, ignore_len, ignore_contents)
294 if not Ctx().keep_cvsignore:
295 return
297 self.dumpfile.write(
298 'Node-path: %s\n'
299 'Node-kind: file\n'
300 'Node-action: %s\n'
301 '%s' # no property header if no props
302 % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
305 pos = self.dumpfile.tell()
307 content_header_fmt = (
308 'Text-content-length: %16d\n'
309 'Text-content-md5: %32s\n'
310 'Content-length: %16d\n'
311 '\n'
314 self.dumpfile.write(content_header_fmt % (0, '', 0,))
316 if prop_contents:
317 self.dumpfile.write(prop_contents)
319 # Insert the rev contents, calculating length and checksum.
320 checksum = md5()
321 checksum.update(data)
322 length = len(data)
323 self.dumpfile.write(data)
325 # Go back to overwrite the length and checksum headers with the
326 # correct values. The content length is the length of property
327 # data, text data, and any metadata around/inside around them:
328 self.dumpfile.seek(pos, 0)
329 self.dumpfile.write(
330 content_header_fmt
331 % (length, checksum.hexdigest(), length + len(prop_contents),)
334 # Jump back to the end of the stream
335 self.dumpfile.seek(0, 2)
337 # This record is done (write two newlines -- one to terminate
338 # contents that weren't themselves newline-termination, one to
339 # provide a blank line for readability.
340 self.dumpfile.write('\n\n')
342 def add_path(self, cvs_rev):
343 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
345 self._add_or_change_path(cvs_rev, OP_ADD)
347 def change_path(self, cvs_rev):
348 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
350 self._add_or_change_path(cvs_rev, OP_CHANGE)
352 def delete_lod(self, lod):
353 """Emit the deletion of LOD."""
355 self.dumpfile.write(
356 'Node-path: %s\n'
357 'Node-action: delete\n'
358 '\n'
359 % (self._utf8_path(lod.get_path()),)
361 self._basic_directories.remove(lod.get_path())
363 def delete_path(self, lod, cvs_path):
364 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
365 if basename == '.cvsignore':
366 # When a .cvsignore file is deleted, the directory's svn:ignore
367 # property needs to be deleted.
368 ignore_contents = 'PROPS-END\n'
369 ignore_len = len(ignore_contents)
371 # write headers, then props
372 self.dumpfile.write(
373 'Node-path: %s\n'
374 'Node-kind: dir\n'
375 'Node-action: change\n'
376 'Prop-content-length: %d\n'
377 'Content-length: %d\n'
378 '\n'
379 '%s'
380 % (self._utf8_path(dir_path),
381 ignore_len, ignore_len, ignore_contents)
383 if not Ctx().keep_cvsignore:
384 return
386 self.dumpfile.write(
387 'Node-path: %s\n'
388 'Node-action: delete\n'
389 '\n'
390 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
393 def copy_lod(self, src_lod, dest_lod, src_revnum):
394 # Register the main LOD directory, and create parent directories
395 # as needed:
396 self._register_basic_directory(dest_lod.get_path(), False)
398 self.dumpfile.write(
399 'Node-path: %s\n'
400 'Node-kind: dir\n'
401 'Node-action: add\n'
402 'Node-copyfrom-rev: %d\n'
403 'Node-copyfrom-path: %s\n'
404 '\n'
405 % (self._utf8_path(dest_lod.get_path()),
406 src_revnum, self._utf8_path(src_lod.get_path()))
409 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
410 if isinstance(cvs_path, CVSFile):
411 node_kind = 'file'
412 if cvs_path.basename == '.cvsignore':
413 # FIXME: Here we have to adjust the containing directory's
414 # svn:ignore property to reflect the addition of the
415 # .cvsignore file to the LOD! This is awkward because we
416 # don't have the contents of the .cvsignore file available.
417 if not Ctx().keep_cvsignore:
418 return
419 elif isinstance(cvs_path, CVSDirectory):
420 node_kind = 'dir'
421 else:
422 raise InternalError()
424 self.dumpfile.write(
425 'Node-path: %s\n'
426 'Node-kind: %s\n'
427 'Node-action: add\n'
428 'Node-copyfrom-rev: %d\n'
429 'Node-copyfrom-path: %s\n'
430 '\n'
432 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
433 node_kind,
434 src_revnum,
435 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
439 def finish(self):
440 """Perform any cleanup necessary after all revisions have been
441 committed."""
443 self.dumpfile.close()
446 def generate_ignores(raw_ignore_val):
447 ignore_vals = [ ]
448 for ignore in raw_ignore_val.split():
449 # Reset the list if we encounter a '!'
450 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
451 if ignore == '!':
452 ignore_vals = [ ]
453 else:
454 ignore_vals.append(ignore)
455 return ignore_vals