Import svntest.main.run_tests explicitly.
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blobb5fe531b056a4fb3f44fb00146ce2fdcd1a83b5f
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 import md5
22 from cvs2svn_lib import config
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.common import InternalError
25 from cvs2svn_lib.common import path_split
26 from cvs2svn_lib.context import Ctx
27 from cvs2svn_lib.cvs_file import CVSDirectory
28 from cvs2svn_lib.cvs_file import CVSFile
29 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
30 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
33 # Things that can happen to a file.
34 OP_ADD = 'add'
35 OP_CHANGE = 'change'
38 class DumpfileDelegate(SVNRepositoryDelegate):
39 """Create a Subversion dumpfile."""
41 def __init__(self, revision_reader, dumpfile_path):
42 """Return a new DumpfileDelegate instance, attached to a dumpfile
43 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
45 self._revision_reader = revision_reader
46 self.dumpfile_path = dumpfile_path
48 self.dumpfile = open(self.dumpfile_path, 'wb')
49 self._write_dumpfile_header(self.dumpfile)
51 # A set of the basic project infrastructure project directories
52 # that have been created so far, as SVN paths. (The root
53 # directory is considered to be present at initialization.) This
54 # includes all of the LOD paths, and all of their parent
55 # directories etc.
56 self._basic_directories = set([''])
58 def _write_dumpfile_header(self, dumpfile):
59 # Initialize the dumpfile with the standard headers.
61 # Since the CVS repository doesn't have a UUID, and the Subversion
62 # repository will be created with one anyway, we don't specify a
63 # UUID in the dumpflie
64 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
66 def _utf8_path(self, path):
67 """Return a copy of PATH encoded in UTF-8."""
69 pieces = path.split('/')
70 # Convert each path component separately (as they may each use
71 # different encodings).
72 for i in range(len(pieces)):
73 try:
74 pieces[i] = Ctx().cvs_filename_decoder(pieces[i]).encode('utf8')
75 except UnicodeError:
76 raise FatalError(
77 "Unable to convert a path '%s' to internal encoding.\n"
78 "Consider rerunning with one or more '--encoding' parameters or\n"
79 "with '--fallback-encoding'."
80 % (path,))
81 return '/'.join(pieces)
83 def _string_for_prop(self, name, value):
84 """Return a property in the form needed for the dumpfile."""
86 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
88 def start_commit(self, revnum, revprops):
89 """Emit the start of SVN_COMMIT (an SVNCommit)."""
91 self.revision = revnum
93 # The start of a new commit typically looks like this:
95 # Revision-number: 1
96 # Prop-content-length: 129
97 # Content-length: 129
99 # K 7
100 # svn:log
101 # V 27
102 # Log message for revision 1.
103 # K 10
104 # svn:author
105 # V 7
106 # jrandom
107 # K 8
108 # svn:date
109 # V 27
110 # 2003-04-22T22:57:58.132837Z
111 # PROPS-END
113 # Notice that the length headers count everything -- not just the
114 # length of the data but also the lengths of the lengths, including
115 # the 'K ' or 'V ' prefixes.
117 # The reason there are both Prop-content-length and Content-length
118 # is that the former includes just props, while the latter includes
119 # everything. That's the generic header form for any entity in a
120 # dumpfile. But since revisions only have props, the two lengths
121 # are always the same for revisions.
123 # Calculate the output needed for the property definitions.
124 prop_names = revprops.keys()
125 prop_names.sort()
126 prop_strings = []
127 for propname in prop_names:
128 if revprops[propname] is not None:
129 prop_strings.append(
130 self._string_for_prop(propname, revprops[propname]))
132 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
133 total_len = len(all_prop_strings)
135 # Print the revision header and revprops
136 self.dumpfile.write(
137 'Revision-number: %d\n'
138 'Prop-content-length: %d\n'
139 'Content-length: %d\n'
140 '\n'
141 '%s'
142 '\n'
143 % (self.revision, total_len, total_len, all_prop_strings)
146 def end_commit(self):
147 pass
149 def _make_any_dir(self, path):
150 """Emit the creation of directory PATH."""
152 self.dumpfile.write(
153 "Node-path: %s\n"
154 "Node-kind: dir\n"
155 "Node-action: add\n"
156 "\n"
157 "\n"
158 % self._utf8_path(path)
161 def _register_basic_directory(self, path, create):
162 """Register the creation of PATH if it is not already there.
164 Create any parent directories that do not already exist. If
165 CREATE is set, also create PATH if it doesn't already exist. This
166 method should only be used for the LOD paths and the directories
167 containing them, not for directories within an LOD path."""
169 if path not in self._basic_directories:
170 # Make sure that the parent directory is present:
171 self._register_basic_directory(path_split(path)[0], True)
172 if create:
173 self._make_any_dir(path)
174 self._basic_directories.add(path)
176 def initialize_project(self, project):
177 """Create any initial directories for the project.
179 The trunk, tags, and branches directories directories are created
180 the first time the project is seen. Be sure not to create parent
181 directories that already exist (e.g., because two directories
182 share part of their paths either within or across projects)."""
184 for path in project.get_initial_directories():
185 self._register_basic_directory(path, True)
187 def initialize_lod(self, lod):
188 lod_path = lod.get_path()
189 if lod_path:
190 self._register_basic_directory(lod_path, True)
192 def mkdir(self, lod, cvs_directory):
193 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
195 def _add_or_change_path(self, s_item, op):
196 """Emit the addition or change corresponding to S_ITEM.
198 OP is either the constant OP_ADD or OP_CHANGE."""
200 assert op in [OP_ADD, OP_CHANGE]
202 # Convenience variables
203 cvs_rev = s_item.cvs_rev
205 # The property handling here takes advantage of an undocumented
206 # but IMHO consistent feature of the Subversion dumpfile-loading
207 # code. When a node's properties aren't mentioned (that is, the
208 # "Prop-content-length:" header is absent, no properties are
209 # listed at all, and there is no "PROPS-END\n" line) then no
210 # change is made to the node's properties.
212 # This is consistent with the way dumpfiles behave w.r.t. text
213 # content changes, so I'm comfortable relying on it. If you
214 # commit a change to *just* the properties of some node that
215 # already has text contents from a previous revision, then in the
216 # dumpfile output for the prop change, no "Text-content-length:"
217 # nor "Text-content-md5:" header will be present, and the text of
218 # the file will not be given. But this does not cause the file's
219 # text to be erased! It simply remains unchanged.
221 # This works out great for cvs2svn, due to lucky coincidences:
223 # For files, the only properties we ever set are set in the first
224 # revision; all other revisions (including on branches) inherit
225 # from that. After the first revision, we never change file
226 # properties, therefore, there is no need to remember the full set
227 # of properties on a given file once we've set it.
229 # For directories, the only property we set is "svn:ignore", and
230 # while we may change it after the first revision, we always do so
231 # based on the contents of a ".cvsignore" file -- in other words,
232 # CVS is doing the remembering for us, so we still don't have to
233 # preserve the previous value of the property ourselves.
235 # Calculate the (sorted-by-name) property string and length, if any.
236 if s_item.svn_props_changed:
237 svn_props = s_item.svn_props
238 prop_contents = ''
239 prop_names = svn_props.keys()
240 prop_names.sort()
241 for pname in prop_names:
242 pvalue = svn_props[pname]
243 if pvalue is not None:
244 prop_contents += self._string_for_prop(pname, pvalue)
245 prop_contents += 'PROPS-END\n'
246 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
247 else:
248 prop_contents = ''
249 props_header = ''
251 # If the file has keywords, we must prevent CVS/RCS from expanding
252 # the keywords because they must be unexpanded in the repository,
253 # or Subversion will get confused.
254 stream = self._revision_reader.get_content_stream(
255 cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
258 if Ctx().decode_apple_single:
259 # Insert a filter to decode any files that are in AppleSingle
260 # format:
261 stream = get_maybe_apple_single_stream(stream)
263 # Insert a filter to convert all EOLs to LFs if neccessary
265 eol_style = s_item.svn_props.get('svn:eol-style', None)
266 if eol_style:
267 stream = LF_EOL_Filter(stream, eol_style)
269 buf = None
271 # treat .cvsignore as a directory property
272 dir_path, basename = path_split(cvs_rev.get_svn_path())
273 if basename == '.cvsignore':
274 buf = stream.read()
275 ignore_vals = generate_ignores(buf)
276 ignore_contents = '\n'.join(ignore_vals)
277 if ignore_contents:
278 ignore_contents += '\n'
279 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
280 (len(ignore_contents), ignore_contents))
281 ignore_contents += 'PROPS-END\n'
282 ignore_len = len(ignore_contents)
284 # write headers, then props
285 self.dumpfile.write(
286 'Node-path: %s\n'
287 'Node-kind: dir\n'
288 'Node-action: change\n'
289 'Prop-content-length: %d\n'
290 'Content-length: %d\n'
291 '\n'
292 '%s'
293 % (self._utf8_path(dir_path),
294 ignore_len, ignore_len, ignore_contents)
296 if not Ctx().keep_cvsignore:
297 stream.close()
298 return
300 self.dumpfile.write(
301 'Node-path: %s\n'
302 'Node-kind: file\n'
303 'Node-action: %s\n'
304 '%s' # no property header if no props
305 % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
308 pos = self.dumpfile.tell()
310 content_header_fmt = (
311 'Text-content-length: %16d\n'
312 'Text-content-md5: %32s\n'
313 'Content-length: %16d\n'
314 '\n'
317 self.dumpfile.write(content_header_fmt % (0, '', 0,))
319 if prop_contents:
320 self.dumpfile.write(prop_contents)
322 # Insert the rev contents, calculating length and checksum as we go.
323 checksum = md5.new()
324 length = 0
325 if buf is None:
326 buf = stream.read(config.PIPE_READ_SIZE)
327 while buf != '':
328 checksum.update(buf)
329 length += len(buf)
330 self.dumpfile.write(buf)
331 buf = stream.read(config.PIPE_READ_SIZE)
333 stream.close()
335 # Go back to overwrite the length and checksum headers with the
336 # correct values. The content length is the length of property
337 # data, text data, and any metadata around/inside around them:
338 self.dumpfile.seek(pos, 0)
339 self.dumpfile.write(
340 content_header_fmt
341 % (length, checksum.hexdigest(), length + len(prop_contents),)
344 # Jump back to the end of the stream
345 self.dumpfile.seek(0, 2)
347 # This record is done (write two newlines -- one to terminate
348 # contents that weren't themselves newline-termination, one to
349 # provide a blank line for readability.
350 self.dumpfile.write('\n\n')
352 def add_path(self, s_item):
353 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
355 self._add_or_change_path(s_item, OP_ADD)
357 def change_path(self, s_item):
358 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
360 self._add_or_change_path(s_item, OP_CHANGE)
362 def delete_lod(self, lod):
363 """Emit the deletion of LOD."""
365 self.dumpfile.write(
366 'Node-path: %s\n'
367 'Node-action: delete\n'
368 '\n'
369 % (self._utf8_path(lod.get_path()),)
371 self._basic_directories.remove(lod.get_path())
373 def delete_path(self, lod, cvs_path):
374 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
375 if basename == '.cvsignore':
376 # When a .cvsignore file is deleted, the directory's svn:ignore
377 # property needs to be deleted.
378 ignore_contents = 'PROPS-END\n'
379 ignore_len = len(ignore_contents)
381 # write headers, then props
382 self.dumpfile.write(
383 'Node-path: %s\n'
384 'Node-kind: dir\n'
385 'Node-action: change\n'
386 'Prop-content-length: %d\n'
387 'Content-length: %d\n'
388 '\n'
389 '%s'
390 % (self._utf8_path(dir_path),
391 ignore_len, ignore_len, ignore_contents)
393 if not Ctx().keep_cvsignore:
394 return
396 self.dumpfile.write(
397 'Node-path: %s\n'
398 'Node-action: delete\n'
399 '\n'
400 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
403 def copy_lod(self, src_lod, dest_lod, src_revnum):
404 # Register the main LOD directory, and create parent directories
405 # as needed:
406 self._register_basic_directory(dest_lod.get_path(), False)
408 self.dumpfile.write(
409 'Node-path: %s\n'
410 'Node-kind: dir\n'
411 'Node-action: add\n'
412 'Node-copyfrom-rev: %d\n'
413 'Node-copyfrom-path: %s\n'
414 '\n'
415 % (self._utf8_path(dest_lod.get_path()),
416 src_revnum, self._utf8_path(src_lod.get_path()))
419 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
420 if isinstance(cvs_path, CVSFile):
421 node_kind = 'file'
422 if cvs_path.basename == '.cvsignore':
423 # FIXME: Here we have to adjust the containing directory's
424 # svn:ignore property to reflect the addition of the
425 # .cvsignore file to the LOD! This is awkward because we
426 # don't have the contents of the .cvsignore file available.
427 if not Ctx().keep_cvsignore:
428 return
429 elif isinstance(cvs_path, CVSDirectory):
430 node_kind = 'dir'
431 else:
432 raise InternalError()
434 self.dumpfile.write(
435 'Node-path: %s\n'
436 'Node-kind: %s\n'
437 'Node-action: add\n'
438 'Node-copyfrom-rev: %d\n'
439 'Node-copyfrom-path: %s\n'
440 '\n'
442 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
443 node_kind,
444 src_revnum,
445 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
449 def finish(self):
450 """Perform any cleanup necessary after all revisions have been
451 committed."""
453 self.dumpfile.close()
456 def generate_ignores(raw_ignore_val):
457 ignore_vals = [ ]
458 for ignore in raw_ignore_val.split():
459 # Reset the list if we encounter a '!'
460 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
461 if ignore == '!':
462 ignore_vals = [ ]
463 else:
464 ignore_vals.append(ignore)
465 return ignore_vals
468 class LF_EOL_Filter:
469 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
470 into the appropriate canonical eol style."""
472 eol_style_replacements = {
473 'LF' : '\n',
474 'CR' : '\r',
475 'CRLF' : '\r\n',
476 'native' : '\n',
479 def __init__(self, stream, eol_style):
480 self.stream = stream
481 self.replacement = self.eol_style_replacements[eol_style]
482 self.carry_cr = False
483 self.eof = False
485 def read(self, size=-1):
486 while True:
487 buf = self.stream.read(size)
488 self.eof = len(buf) == 0
489 if self.carry_cr:
490 buf = '\r' + buf
491 self.carry_cr = False
492 if not self.eof and buf[-1] == '\r':
493 self.carry_cr = True
494 buf = buf[:-1]
495 buf = buf.replace('\r\n', '\n')
496 buf = buf.replace('\r', '\n')
497 if self.replacement != '\n':
498 buf = buf.replace('\n', self.replacement)
499 if buf or self.eof:
500 return buf
502 def close(self):
503 self.stream.close()
504 self.stream = None