* cvs2svn: Use gnu_getopt when available (Python >= 2.3) for more flexible
[cvs2svn.git] / cvs2svn_lib / dumpfile_delegate.py
blobaa6e0be536f1c6eaff1a7384066cd8457cb53076
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2006 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 import os
21 import md5
23 from boolean import *
24 import common
25 from common import FatalError
26 import config
27 from context import Ctx
28 from svn_repository_mirror import SVNRepositoryMirrorDelegate
31 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
32 """Create a Subversion dumpfile."""
34 def __init__(self, dumpfile_path=None):
35 """Return a new DumpfileDelegate instance, attached to a dumpfile
36 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
38 if dumpfile_path:
39 self.dumpfile_path = dumpfile_path
40 else:
41 self.dumpfile_path = Ctx().dumpfile
43 self.dumpfile = open(self.dumpfile_path, 'wb')
44 self._write_dumpfile_header(self.dumpfile)
46 def _write_dumpfile_header(self, dumpfile):
47 # Initialize the dumpfile with the standard headers.
49 # Since the CVS repository doesn't have a UUID, and the Subversion
50 # repository will be created with one anyway, we don't specify a
51 # UUID in the dumpflie
52 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
54 def _utf8_path(self, path):
55 """Return a copy of PATH encoded in UTF-8."""
57 pieces = path.split('/')
58 # Convert each path component separately (as they may each use
59 # different encodings).
60 for i in range(len(pieces)):
61 try:
62 # Log messages can be converted with the 'replace' strategy,
63 # but we can't afford any lossiness here.
64 pieces[i] = Ctx().to_utf8(pieces[i], 'strict')
65 except UnicodeError:
66 raise FatalError(
67 "Unable to convert a path '%s' to internal encoding.\n"
68 "Consider rerunning with one or more '--encoding' parameters."
69 % (path,))
70 return '/'.join(pieces)
72 def _string_for_prop(self, name, value):
73 """Return a property in the form needed for the dumpfile."""
75 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
77 def start_commit(self, svn_commit):
78 """Emit the start of SVN_COMMIT (an SVNCommit)."""
80 self.revision = svn_commit.revnum
82 # The start of a new commit typically looks like this:
84 # Revision-number: 1
85 # Prop-content-length: 129
86 # Content-length: 129
88 # K 7
89 # svn:log
90 # V 27
91 # Log message for revision 1.
92 # K 10
93 # svn:author
94 # V 7
95 # jrandom
96 # K 8
97 # svn:date
98 # V 27
99 # 2003-04-22T22:57:58.132837Z
100 # PROPS-END
102 # Notice that the length headers count everything -- not just the
103 # length of the data but also the lengths of the lengths, including
104 # the 'K ' or 'V ' prefixes.
106 # The reason there are both Prop-content-length and Content-length
107 # is that the former includes just props, while the latter includes
108 # everything. That's the generic header form for any entity in a
109 # dumpfile. But since revisions only have props, the two lengths
110 # are always the same for revisions.
112 # Calculate the output needed for the property definitions.
113 props = svn_commit.get_revprops()
114 prop_names = props.keys()
115 prop_names.sort()
116 prop_strings = []
117 for propname in prop_names:
118 if props[propname] is not None:
119 prop_strings.append(self._string_for_prop(propname, props[propname]))
121 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
122 total_len = len(all_prop_strings)
124 # Print the revision header and props
125 self.dumpfile.write('Revision-number: %d\n'
126 'Prop-content-length: %d\n'
127 'Content-length: %d\n'
128 '\n'
129 % (self.revision, total_len, total_len))
131 self.dumpfile.write(all_prop_strings)
132 self.dumpfile.write('\n')
134 def mkdir(self, path):
135 """Emit the creation of directory PATH."""
137 self.dumpfile.write("Node-path: %s\n"
138 "Node-kind: dir\n"
139 "Node-action: add\n"
140 "\n"
141 "\n" % self._utf8_path(path))
143 def _add_or_change_path(self, s_item, op):
144 """Emit the addition or change corresponding to S_ITEM.
145 OP is either the constant OP_ADD or OP_CHANGE."""
147 # Validation stuffs
148 if op == common.OP_ADD:
149 action = 'add'
150 elif op == common.OP_CHANGE:
151 action = 'change'
152 else:
153 raise FatalError("_add_or_change_path() called with bad op ('%s')"
154 % (op,))
156 # Convenience variables
157 c_rev = s_item.c_rev
159 # The property handling here takes advantage of an undocumented
160 # but IMHO consistent feature of the Subversion dumpfile-loading
161 # code. When a node's properties aren't mentioned (that is, the
162 # "Prop-content-length:" header is absent, no properties are
163 # listed at all, and there is no "PROPS-END\n" line) then no
164 # change is made to the node's properties.
166 # This is consistent with the way dumpfiles behave w.r.t. text
167 # content changes, so I'm comfortable relying on it. If you
168 # commit a change to *just* the properties of some node that
169 # already has text contents from a previous revision, then in the
170 # dumpfile output for the prop change, no "Text-content-length:"
171 # nor "Text-content-md5:" header will be present, and the text of
172 # the file will not be given. But this does not cause the file's
173 # text to be erased! It simply remains unchanged.
175 # This works out great for cvs2svn, due to lucky coincidences:
177 # For files, the only properties we ever set are set in the first
178 # revision; all other revisions (including on branches) inherit
179 # from that. After the first revision, we never change file
180 # properties, therefore, there is no need to remember the full set
181 # of properties on a given file once we've set it.
183 # For directories, the only property we set is "svn:ignore", and
184 # while we may change it after the first revision, we always do so
185 # based on the contents of a ".cvsignore" file -- in other words,
186 # CVS is doing the remembering for us, so we still don't have to
187 # preserve the previous value of the property ourselves.
189 # Calculate the (sorted-by-name) property string and length, if any.
190 if s_item.svn_props_changed:
191 svn_props = s_item.svn_props
192 prop_contents = ''
193 prop_names = svn_props.keys()
194 prop_names.sort()
195 for pname in prop_names:
196 pvalue = svn_props[pname]
197 if pvalue is not None:
198 prop_contents += self._string_for_prop(pname, pvalue)
199 prop_contents += 'PROPS-END\n'
200 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
201 else:
202 prop_contents = ''
203 props_header = ''
205 # treat .cvsignore as a directory property
206 dir_path, basename = os.path.split(c_rev.svn_path)
207 if basename == ".cvsignore":
208 ignore_vals = generate_ignores(c_rev)
209 ignore_contents = '\n'.join(ignore_vals)
210 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
211 (len(ignore_contents), ignore_contents))
212 ignore_contents += 'PROPS-END\n'
213 ignore_len = len(ignore_contents)
215 # write headers, then props
216 self.dumpfile.write('Node-path: %s\n'
217 'Node-kind: dir\n'
218 'Node-action: change\n'
219 'Prop-content-length: %d\n'
220 'Content-length: %d\n'
221 '\n'
222 '%s'
223 % (self._utf8_path(dir_path), ignore_len,
224 ignore_len, ignore_contents))
226 # If the file has keywords, we must prevent CVS/RCS from expanding
227 # the keywords because they must be unexpanded in the repository,
228 # or Subversion will get confused.
229 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
230 c_rev, suppress_keyword_substitution=s_item.has_keywords)
232 self.dumpfile.write('Node-path: %s\n'
233 'Node-kind: file\n'
234 'Node-action: %s\n'
235 '%s' # no property header if no props
236 'Text-content-length: '
237 % (self._utf8_path(c_rev.svn_path),
238 action, props_header))
240 pos = self.dumpfile.tell()
242 self.dumpfile.write('0000000000000000\n'
243 'Text-content-md5: 00000000000000000000000000000000\n'
244 'Content-length: 0000000000000000\n'
245 '\n')
247 if prop_contents:
248 self.dumpfile.write(prop_contents)
250 # Insert a filter to convert all EOLs to LFs if neccessary
251 if s_item.needs_eol_filter:
252 data_reader = LF_EOL_Filter(pipe.stdout)
253 else:
254 data_reader = pipe.stdout
256 # Insert the rev contents, calculating length and checksum as we go.
257 checksum = md5.new()
258 length = 0
259 while True:
260 buf = data_reader.read(config.PIPE_READ_SIZE)
261 if buf == '':
262 break
263 checksum.update(buf)
264 length += len(buf)
265 self.dumpfile.write(buf)
267 pipe.stdout.close()
268 error_output = pipe.stderr.read()
269 exit_status = pipe.wait()
270 if exit_status:
271 raise FatalError("The command '%s' failed with exit status: %s\n"
272 "and the following output:\n"
273 "%s" % (pipe_cmd, exit_status, error_output))
275 # Go back to patch up the length and checksum headers:
276 self.dumpfile.seek(pos, 0)
277 # We left 16 zeros for the text length; replace them with the real
278 # length, padded on the left with spaces:
279 self.dumpfile.write('%16d' % length)
280 # 16... + 1 newline + len('Text-content-md5: ') == 35
281 self.dumpfile.seek(pos + 35, 0)
282 self.dumpfile.write(checksum.hexdigest())
283 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
284 self.dumpfile.seek(pos + 84, 0)
285 # The content length is the length of property data, text data,
286 # and any metadata around/inside around them.
287 self.dumpfile.write('%16d' % (length + len(prop_contents)))
288 # Jump back to the end of the stream
289 self.dumpfile.seek(0, 2)
291 # This record is done (write two newlines -- one to terminate
292 # contents that weren't themselves newline-termination, one to
293 # provide a blank line for readability.
294 self.dumpfile.write('\n\n')
296 def add_path(self, s_item):
297 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
299 self._add_or_change_path(s_item, common.OP_ADD)
301 def change_path(self, s_item):
302 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
304 self._add_or_change_path(s_item, common.OP_CHANGE)
306 def delete_path(self, path):
307 """Emit the deletion of PATH."""
309 self.dumpfile.write('Node-path: %s\n'
310 'Node-action: delete\n'
311 '\n' % self._utf8_path(path))
313 def copy_path(self, src_path, dest_path, src_revnum):
314 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
316 # We don't need to include "Node-kind:" for copies; the loader
317 # ignores it anyway and just uses the source kind instead.
318 self.dumpfile.write('Node-path: %s\n'
319 'Node-action: add\n'
320 'Node-copyfrom-rev: %d\n'
321 'Node-copyfrom-path: /%s\n'
322 '\n'
323 % (self._utf8_path(dest_path),
324 src_revnum,
325 self._utf8_path(src_path)))
327 def finish(self):
328 """Perform any cleanup necessary after all revisions have been
329 committed."""
331 self.dumpfile.close()
334 def generate_ignores(c_rev):
335 # Read in props
336 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
337 buf = pipe.stdout.read(config.PIPE_READ_SIZE)
338 raw_ignore_val = ""
339 while buf:
340 raw_ignore_val += buf
341 buf = pipe.stdout.read(config.PIPE_READ_SIZE)
342 pipe.stdout.close()
343 error_output = pipe.stderr.read()
344 exit_status = pipe.wait()
345 if exit_status:
346 raise FatalError("The command '%s' failed with exit status: %s\n"
347 "and the following output:\n"
348 "%s" % (pipe_cmd, exit_status, error_output))
350 # Tweak props: First, convert any spaces to newlines...
351 raw_ignore_val = '\n'.join(raw_ignore_val.split())
352 raw_ignores = raw_ignore_val.split('\n')
353 ignore_vals = [ ]
354 for ignore in raw_ignores:
355 # Reset the list if we encounter a '!'
356 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
357 if ignore == '!':
358 ignore_vals = [ ]
359 continue
360 # Skip empty lines
361 if len(ignore) == 0:
362 continue
363 ignore_vals.append(ignore)
364 return ignore_vals
367 class LF_EOL_Filter:
368 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
369 into LFs only."""
371 def __init__(self, stream):
372 self.stream = stream
373 self.carry_cr = False
374 self.eof = False
376 def read(self, size):
377 while True:
378 buf = self.stream.read(size)
379 self.eof = len(buf) == 0
380 if self.carry_cr:
381 buf = '\r' + buf
382 self.carry_cr = False
383 if not self.eof and buf[-1] == '\r':
384 self.carry_cr = True
385 buf = buf[:-1]
386 buf = buf.replace('\r\n', '\n')
387 buf = buf.replace('\r', '\n')
388 if len(buf) > 0 or self.eof:
389 return buf