Extract functions generate_edits_from_blocks() and write_edits().
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blob0c257755ad814b15dac9b1280a4dc7c25acb3054
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 try:
21 from hashlib import md5
22 except ImportError:
23 from md5 import new as md5
26 from cvs2svn_lib import config
27 from cvs2svn_lib.common import FatalError
28 from cvs2svn_lib.common import InternalError
29 from cvs2svn_lib.common import path_split
30 from cvs2svn_lib.context import Ctx
31 from cvs2svn_lib.cvs_path import CVSDirectory
32 from cvs2svn_lib.cvs_path import CVSFile
33 from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
34 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
37 # Things that can happen to a file.
38 OP_ADD = 'add'
39 OP_CHANGE = 'change'
42 class DumpfileDelegate(SVNRepositoryDelegate):
43 """Create a Subversion dumpfile."""
45 def __init__(self, revision_reader, dumpfile_path):
46 """Return a new DumpfileDelegate instance, attached to a dumpfile
47 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
49 self._revision_reader = revision_reader
50 self.dumpfile_path = dumpfile_path
52 self.dumpfile = open(self.dumpfile_path, 'wb')
53 self._write_dumpfile_header(self.dumpfile)
55 # A set of the basic project infrastructure project directories
56 # that have been created so far, as SVN paths. (The root
57 # directory is considered to be present at initialization.) This
58 # includes all of the LOD paths, and all of their parent
59 # directories etc.
60 self._basic_directories = set([''])
62 def _write_dumpfile_header(self, dumpfile):
63 # Initialize the dumpfile with the standard headers.
65 # Since the CVS repository doesn't have a UUID, and the Subversion
66 # repository will be created with one anyway, we don't specify a
67 # UUID in the dumpflie
68 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
70 def _utf8_path(self, path):
71 """Return a copy of PATH encoded in UTF-8."""
73 # Convert each path component separately (as they may each use
74 # different encodings).
75 try:
76 return '/'.join([
77 Ctx().cvs_filename_decoder(piece).encode('utf8')
78 for piece in path.split('/')
80 except UnicodeError:
81 raise FatalError(
82 "Unable to convert a path '%s' to internal encoding.\n"
83 "Consider rerunning with one or more '--encoding' parameters or\n"
84 "with '--fallback-encoding'."
85 % (path,))
87 def _string_for_prop(self, name, value):
88 """Return a property in the form needed for the dumpfile."""
90 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
92 def start_commit(self, revnum, revprops):
93 """Emit the start of SVN_COMMIT (an SVNCommit)."""
95 self.revision = revnum
97 # The start of a new commit typically looks like this:
99 # Revision-number: 1
100 # Prop-content-length: 129
101 # Content-length: 129
103 # K 7
104 # svn:log
105 # V 27
106 # Log message for revision 1.
107 # K 10
108 # svn:author
109 # V 7
110 # jrandom
111 # K 8
112 # svn:date
113 # V 27
114 # 2003-04-22T22:57:58.132837Z
115 # PROPS-END
117 # Notice that the length headers count everything -- not just the
118 # length of the data but also the lengths of the lengths, including
119 # the 'K ' or 'V ' prefixes.
121 # The reason there are both Prop-content-length and Content-length
122 # is that the former includes just props, while the latter includes
123 # everything. That's the generic header form for any entity in a
124 # dumpfile. But since revisions only have props, the two lengths
125 # are always the same for revisions.
127 # Calculate the output needed for the property definitions.
128 prop_names = revprops.keys()
129 prop_names.sort()
130 prop_strings = []
131 for propname in prop_names:
132 if revprops[propname] is not None:
133 prop_strings.append(
134 self._string_for_prop(propname, revprops[propname]))
136 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
137 total_len = len(all_prop_strings)
139 # Print the revision header and revprops
140 self.dumpfile.write(
141 'Revision-number: %d\n'
142 'Prop-content-length: %d\n'
143 'Content-length: %d\n'
144 '\n'
145 '%s'
146 '\n'
147 % (self.revision, total_len, total_len, all_prop_strings)
150 def end_commit(self):
151 pass
153 def _make_any_dir(self, path):
154 """Emit the creation of directory PATH."""
156 self.dumpfile.write(
157 "Node-path: %s\n"
158 "Node-kind: dir\n"
159 "Node-action: add\n"
160 "\n"
161 "\n"
162 % self._utf8_path(path)
165 def _register_basic_directory(self, path, create):
166 """Register the creation of PATH if it is not already there.
168 Create any parent directories that do not already exist. If
169 CREATE is set, also create PATH if it doesn't already exist. This
170 method should only be used for the LOD paths and the directories
171 containing them, not for directories within an LOD path."""
173 if path not in self._basic_directories:
174 # Make sure that the parent directory is present:
175 self._register_basic_directory(path_split(path)[0], True)
176 if create:
177 self._make_any_dir(path)
178 self._basic_directories.add(path)
180 def initialize_project(self, project):
181 """Create any initial directories for the project.
183 The trunk, tags, and branches directories directories are created
184 the first time the project is seen. Be sure not to create parent
185 directories that already exist (e.g., because two directories
186 share part of their paths either within or across projects)."""
188 for path in project.get_initial_directories():
189 self._register_basic_directory(path, True)
191 def initialize_lod(self, lod):
192 lod_path = lod.get_path()
193 if lod_path:
194 self._register_basic_directory(lod_path, True)
196 def mkdir(self, lod, cvs_directory):
197 self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
199 def _add_or_change_path(self, s_item, op):
200 """Emit the addition or change corresponding to S_ITEM.
202 OP is either the constant OP_ADD or OP_CHANGE."""
204 assert op in [OP_ADD, OP_CHANGE]
206 # Convenience variables
207 cvs_rev = s_item.cvs_rev
209 # The property handling here takes advantage of an undocumented
210 # but IMHO consistent feature of the Subversion dumpfile-loading
211 # code. When a node's properties aren't mentioned (that is, the
212 # "Prop-content-length:" header is absent, no properties are
213 # listed at all, and there is no "PROPS-END\n" line) then no
214 # change is made to the node's properties.
216 # This is consistent with the way dumpfiles behave w.r.t. text
217 # content changes, so I'm comfortable relying on it. If you
218 # commit a change to *just* the properties of some node that
219 # already has text contents from a previous revision, then in the
220 # dumpfile output for the prop change, no "Text-content-length:"
221 # nor "Text-content-md5:" header will be present, and the text of
222 # the file will not be given. But this does not cause the file's
223 # text to be erased! It simply remains unchanged.
225 # This works out great for cvs2svn, due to lucky coincidences:
227 # For files, the only properties we ever set are set in the first
228 # revision; all other revisions (including on branches) inherit
229 # from that. After the first revision, we never change file
230 # properties, therefore, there is no need to remember the full set
231 # of properties on a given file once we've set it.
233 # For directories, the only property we set is "svn:ignore", and
234 # while we may change it after the first revision, we always do so
235 # based on the contents of a ".cvsignore" file -- in other words,
236 # CVS is doing the remembering for us, so we still don't have to
237 # preserve the previous value of the property ourselves.
239 # Calculate the (sorted-by-name) property string and length, if any.
240 if s_item.svn_props_changed:
241 svn_props = s_item.svn_props
242 prop_contents = ''
243 prop_names = svn_props.keys()
244 prop_names.sort()
245 for pname in prop_names:
246 pvalue = svn_props[pname]
247 if pvalue is not None:
248 prop_contents += self._string_for_prop(pname, pvalue)
249 prop_contents += 'PROPS-END\n'
250 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
251 else:
252 prop_contents = ''
253 props_header = ''
255 # If the file has keywords, we must prevent CVS/RCS from expanding
256 # the keywords because they must be unexpanded in the repository,
257 # or Subversion will get confused.
258 stream = self._revision_reader.get_content_stream(
259 cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
262 if Ctx().decode_apple_single:
263 # Insert a filter to decode any files that are in AppleSingle
264 # format:
265 stream = get_maybe_apple_single_stream(stream)
267 # Insert a filter to convert all EOLs to LFs if neccessary
269 eol_style = s_item.svn_props.get('svn:eol-style', None)
270 if eol_style:
271 stream = LF_EOL_Filter(stream, eol_style)
273 buf = None
275 # treat .cvsignore as a directory property
276 dir_path, basename = path_split(cvs_rev.get_svn_path())
277 if basename == '.cvsignore':
278 buf = stream.read()
279 ignore_vals = generate_ignores(buf)
280 ignore_contents = '\n'.join(ignore_vals)
281 if ignore_contents:
282 ignore_contents += '\n'
283 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
284 (len(ignore_contents), ignore_contents))
285 ignore_contents += 'PROPS-END\n'
286 ignore_len = len(ignore_contents)
288 # write headers, then props
289 self.dumpfile.write(
290 'Node-path: %s\n'
291 'Node-kind: dir\n'
292 'Node-action: change\n'
293 'Prop-content-length: %d\n'
294 'Content-length: %d\n'
295 '\n'
296 '%s'
297 % (self._utf8_path(dir_path),
298 ignore_len, ignore_len, ignore_contents)
300 if not Ctx().keep_cvsignore:
301 stream.close()
302 return
304 self.dumpfile.write(
305 'Node-path: %s\n'
306 'Node-kind: file\n'
307 'Node-action: %s\n'
308 '%s' # no property header if no props
309 % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
312 pos = self.dumpfile.tell()
314 content_header_fmt = (
315 'Text-content-length: %16d\n'
316 'Text-content-md5: %32s\n'
317 'Content-length: %16d\n'
318 '\n'
321 self.dumpfile.write(content_header_fmt % (0, '', 0,))
323 if prop_contents:
324 self.dumpfile.write(prop_contents)
326 # Insert the rev contents, calculating length and checksum as we go.
327 checksum = md5()
328 length = 0
329 if buf is None:
330 buf = stream.read(config.PIPE_READ_SIZE)
331 while buf != '':
332 checksum.update(buf)
333 length += len(buf)
334 self.dumpfile.write(buf)
335 buf = stream.read(config.PIPE_READ_SIZE)
337 stream.close()
339 # Go back to overwrite the length and checksum headers with the
340 # correct values. The content length is the length of property
341 # data, text data, and any metadata around/inside around them:
342 self.dumpfile.seek(pos, 0)
343 self.dumpfile.write(
344 content_header_fmt
345 % (length, checksum.hexdigest(), length + len(prop_contents),)
348 # Jump back to the end of the stream
349 self.dumpfile.seek(0, 2)
351 # This record is done (write two newlines -- one to terminate
352 # contents that weren't themselves newline-termination, one to
353 # provide a blank line for readability.
354 self.dumpfile.write('\n\n')
356 def add_path(self, s_item):
357 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
359 self._add_or_change_path(s_item, OP_ADD)
361 def change_path(self, s_item):
362 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
364 self._add_or_change_path(s_item, OP_CHANGE)
366 def delete_lod(self, lod):
367 """Emit the deletion of LOD."""
369 self.dumpfile.write(
370 'Node-path: %s\n'
371 'Node-action: delete\n'
372 '\n'
373 % (self._utf8_path(lod.get_path()),)
375 self._basic_directories.remove(lod.get_path())
377 def delete_path(self, lod, cvs_path):
378 dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
379 if basename == '.cvsignore':
380 # When a .cvsignore file is deleted, the directory's svn:ignore
381 # property needs to be deleted.
382 ignore_contents = 'PROPS-END\n'
383 ignore_len = len(ignore_contents)
385 # write headers, then props
386 self.dumpfile.write(
387 'Node-path: %s\n'
388 'Node-kind: dir\n'
389 'Node-action: change\n'
390 'Prop-content-length: %d\n'
391 'Content-length: %d\n'
392 '\n'
393 '%s'
394 % (self._utf8_path(dir_path),
395 ignore_len, ignore_len, ignore_contents)
397 if not Ctx().keep_cvsignore:
398 return
400 self.dumpfile.write(
401 'Node-path: %s\n'
402 'Node-action: delete\n'
403 '\n'
404 % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
407 def copy_lod(self, src_lod, dest_lod, src_revnum):
408 # Register the main LOD directory, and create parent directories
409 # as needed:
410 self._register_basic_directory(dest_lod.get_path(), False)
412 self.dumpfile.write(
413 'Node-path: %s\n'
414 'Node-kind: dir\n'
415 'Node-action: add\n'
416 'Node-copyfrom-rev: %d\n'
417 'Node-copyfrom-path: %s\n'
418 '\n'
419 % (self._utf8_path(dest_lod.get_path()),
420 src_revnum, self._utf8_path(src_lod.get_path()))
423 def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
424 if isinstance(cvs_path, CVSFile):
425 node_kind = 'file'
426 if cvs_path.basename == '.cvsignore':
427 # FIXME: Here we have to adjust the containing directory's
428 # svn:ignore property to reflect the addition of the
429 # .cvsignore file to the LOD! This is awkward because we
430 # don't have the contents of the .cvsignore file available.
431 if not Ctx().keep_cvsignore:
432 return
433 elif isinstance(cvs_path, CVSDirectory):
434 node_kind = 'dir'
435 else:
436 raise InternalError()
438 self.dumpfile.write(
439 'Node-path: %s\n'
440 'Node-kind: %s\n'
441 'Node-action: add\n'
442 'Node-copyfrom-rev: %d\n'
443 'Node-copyfrom-path: %s\n'
444 '\n'
446 self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
447 node_kind,
448 src_revnum,
449 self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
453 def finish(self):
454 """Perform any cleanup necessary after all revisions have been
455 committed."""
457 self.dumpfile.close()
460 def generate_ignores(raw_ignore_val):
461 ignore_vals = [ ]
462 for ignore in raw_ignore_val.split():
463 # Reset the list if we encounter a '!'
464 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
465 if ignore == '!':
466 ignore_vals = [ ]
467 else:
468 ignore_vals.append(ignore)
469 return ignore_vals
472 class LF_EOL_Filter:
473 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
474 into the appropriate canonical eol style."""
476 eol_style_replacements = {
477 'LF' : '\n',
478 'CR' : '\r',
479 'CRLF' : '\r\n',
480 'native' : '\n',
483 def __init__(self, stream, eol_style):
484 self.stream = stream
485 self.replacement = self.eol_style_replacements[eol_style]
486 self.carry_cr = False
487 self.eof = False
489 def read(self, size=-1):
490 while True:
491 buf = self.stream.read(size)
492 self.eof = len(buf) == 0
493 if self.carry_cr:
494 buf = '\r' + buf
495 self.carry_cr = False
496 if not self.eof and buf[-1] == '\r':
497 self.carry_cr = True
498 buf = buf[:-1]
499 buf = buf.replace('\r\n', '\n')
500 buf = buf.replace('\r', '\n')
501 if self.replacement != '\n':
502 buf = buf.replace('\n', self.replacement)
503 if buf or self.eof:
504 return buf
506 def close(self):
507 self.stream.close()
508 self.stream = None