1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2006 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
25 from common
import FatalError
27 from context
import Ctx
28 from svn_repository_mirror
import SVNRepositoryMirrorDelegate
31 class DumpfileDelegate(SVNRepositoryMirrorDelegate
):
32 """Create a Subversion dumpfile."""
34 def __init__(self
, dumpfile_path
=None):
35 """Return a new DumpfileDelegate instance, attached to a dumpfile
36 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
39 self
.dumpfile_path
= dumpfile_path
41 self
.dumpfile_path
= Ctx().dumpfile
43 self
.dumpfile
= open(self
.dumpfile_path
, 'wb')
44 self
._write
_dumpfile
_header
(self
.dumpfile
)
46 def _write_dumpfile_header(self
, dumpfile
):
47 # Initialize the dumpfile with the standard headers.
49 # Since the CVS repository doesn't have a UUID, and the Subversion
50 # repository will be created with one anyway, we don't specify a
51 # UUID in the dumpflie
52 dumpfile
.write('SVN-fs-dump-format-version: 2\n\n')
54 def _utf8_path(self
, path
):
55 """Return a copy of PATH encoded in UTF-8."""
57 pieces
= path
.split('/')
58 # Convert each path component separately (as they may each use
59 # different encodings).
60 for i
in range(len(pieces
)):
62 # Log messages can be converted with the 'replace' strategy,
63 # but we can't afford any lossiness here.
64 pieces
[i
] = Ctx().to_utf8(pieces
[i
], 'strict')
67 "Unable to convert a path '%s' to internal encoding.\n"
68 "Consider rerunning with one or more '--encoding' parameters."
70 return '/'.join(pieces
)
72 def _string_for_prop(self
, name
, value
):
73 """Return a property in the form needed for the dumpfile."""
75 return 'K %d\n%s\nV %d\n%s\n' % (len(name
), name
, len(value
), value
)
77 def start_commit(self
, svn_commit
):
78 """Emit the start of SVN_COMMIT (an SVNCommit)."""
80 self
.revision
= svn_commit
.revnum
82 # The start of a new commit typically looks like this:
85 # Prop-content-length: 129
91 # Log message for revision 1.
99 # 2003-04-22T22:57:58.132837Z
102 # Notice that the length headers count everything -- not just the
103 # length of the data but also the lengths of the lengths, including
104 # the 'K ' or 'V ' prefixes.
106 # The reason there are both Prop-content-length and Content-length
107 # is that the former includes just props, while the latter includes
108 # everything. That's the generic header form for any entity in a
109 # dumpfile. But since revisions only have props, the two lengths
110 # are always the same for revisions.
112 # Calculate the output needed for the property definitions.
113 props
= svn_commit
.get_revprops()
114 prop_names
= props
.keys()
117 for propname
in prop_names
:
118 if props
[propname
] is not None:
119 prop_strings
.append(self
._string
_for
_prop
(propname
, props
[propname
]))
121 all_prop_strings
= ''.join(prop_strings
) + 'PROPS-END\n'
122 total_len
= len(all_prop_strings
)
124 # Print the revision header and props
125 self
.dumpfile
.write('Revision-number: %d\n'
126 'Prop-content-length: %d\n'
127 'Content-length: %d\n'
129 % (self
.revision
, total_len
, total_len
))
131 self
.dumpfile
.write(all_prop_strings
)
132 self
.dumpfile
.write('\n')
134 def mkdir(self
, path
):
135 """Emit the creation of directory PATH."""
137 self
.dumpfile
.write("Node-path: %s\n"
141 "\n" % self
._utf
8_path
(path
))
143 def _add_or_change_path(self
, s_item
, op
):
144 """Emit the addition or change corresponding to S_ITEM.
145 OP is either the constant OP_ADD or OP_CHANGE."""
148 if op
== common
.OP_ADD
:
150 elif op
== common
.OP_CHANGE
:
153 raise FatalError("_add_or_change_path() called with bad op ('%s')"
156 # Convenience variables
159 # The property handling here takes advantage of an undocumented
160 # but IMHO consistent feature of the Subversion dumpfile-loading
161 # code. When a node's properties aren't mentioned (that is, the
162 # "Prop-content-length:" header is absent, no properties are
163 # listed at all, and there is no "PROPS-END\n" line) then no
164 # change is made to the node's properties.
166 # This is consistent with the way dumpfiles behave w.r.t. text
167 # content changes, so I'm comfortable relying on it. If you
168 # commit a change to *just* the properties of some node that
169 # already has text contents from a previous revision, then in the
170 # dumpfile output for the prop change, no "Text-content-length:"
171 # nor "Text-content-md5:" header will be present, and the text of
172 # the file will not be given. But this does not cause the file's
173 # text to be erased! It simply remains unchanged.
175 # This works out great for cvs2svn, due to lucky coincidences:
177 # For files, the only properties we ever set are set in the first
178 # revision; all other revisions (including on branches) inherit
179 # from that. After the first revision, we never change file
180 # properties, therefore, there is no need to remember the full set
181 # of properties on a given file once we've set it.
183 # For directories, the only property we set is "svn:ignore", and
184 # while we may change it after the first revision, we always do so
185 # based on the contents of a ".cvsignore" file -- in other words,
186 # CVS is doing the remembering for us, so we still don't have to
187 # preserve the previous value of the property ourselves.
189 # Calculate the (sorted-by-name) property string and length, if any.
190 if s_item
.svn_props_changed
:
191 svn_props
= s_item
.svn_props
193 prop_names
= svn_props
.keys()
195 for pname
in prop_names
:
196 pvalue
= svn_props
[pname
]
197 if pvalue
is not None:
198 prop_contents
+= self
._string
_for
_prop
(pname
, pvalue
)
199 prop_contents
+= 'PROPS-END\n'
200 props_header
= 'Prop-content-length: %d\n' % len(prop_contents
)
205 # treat .cvsignore as a directory property
206 dir_path
, basename
= os
.path
.split(c_rev
.svn_path
)
207 if basename
== ".cvsignore":
208 ignore_vals
= generate_ignores(c_rev
)
209 ignore_contents
= '\n'.join(ignore_vals
)
210 ignore_contents
= ('K 10\nsvn:ignore\nV %d\n%s\n' % \
211 (len(ignore_contents
), ignore_contents
))
212 ignore_contents
+= 'PROPS-END\n'
213 ignore_len
= len(ignore_contents
)
215 # write headers, then props
216 self
.dumpfile
.write('Node-path: %s\n'
218 'Node-action: change\n'
219 'Prop-content-length: %d\n'
220 'Content-length: %d\n'
223 % (self
._utf
8_path
(dir_path
), ignore_len
,
224 ignore_len
, ignore_contents
))
226 # If the file has keywords, we must prevent CVS/RCS from expanding
227 # the keywords because they must be unexpanded in the repository,
228 # or Subversion will get confused.
229 pipe_cmd
, pipe
= Ctx().cvs_repository
.get_co_pipe(
230 c_rev
, suppress_keyword_substitution
=s_item
.has_keywords
)
232 self
.dumpfile
.write('Node-path: %s\n'
235 '%s' # no property header if no props
236 'Text-content-length: '
237 % (self
._utf
8_path
(c_rev
.svn_path
),
238 action
, props_header
))
240 pos
= self
.dumpfile
.tell()
242 self
.dumpfile
.write('0000000000000000\n'
243 'Text-content-md5: 00000000000000000000000000000000\n'
244 'Content-length: 0000000000000000\n'
248 self
.dumpfile
.write(prop_contents
)
250 # Insert a filter to convert all EOLs to LFs if neccessary
251 if s_item
.needs_eol_filter
:
252 data_reader
= LF_EOL_Filter(pipe
.stdout
)
254 data_reader
= pipe
.stdout
256 # Insert the rev contents, calculating length and checksum as we go.
260 buf
= data_reader
.read(config
.PIPE_READ_SIZE
)
265 self
.dumpfile
.write(buf
)
268 error_output
= pipe
.stderr
.read()
269 exit_status
= pipe
.wait()
271 raise FatalError("The command '%s' failed with exit status: %s\n"
272 "and the following output:\n"
273 "%s" % (pipe_cmd
, exit_status
, error_output
))
275 # Go back to patch up the length and checksum headers:
276 self
.dumpfile
.seek(pos
, 0)
277 # We left 16 zeros for the text length; replace them with the real
278 # length, padded on the left with spaces:
279 self
.dumpfile
.write('%16d' % length
)
280 # 16... + 1 newline + len('Text-content-md5: ') == 35
281 self
.dumpfile
.seek(pos
+ 35, 0)
282 self
.dumpfile
.write(checksum
.hexdigest())
283 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
284 self
.dumpfile
.seek(pos
+ 84, 0)
285 # The content length is the length of property data, text data,
286 # and any metadata around/inside around them.
287 self
.dumpfile
.write('%16d' % (length
+ len(prop_contents
)))
288 # Jump back to the end of the stream
289 self
.dumpfile
.seek(0, 2)
291 # This record is done (write two newlines -- one to terminate
292 # contents that weren't themselves newline-termination, one to
293 # provide a blank line for readability.
294 self
.dumpfile
.write('\n\n')
296 def add_path(self
, s_item
):
297 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
299 self
._add
_or
_change
_path
(s_item
, common
.OP_ADD
)
301 def change_path(self
, s_item
):
302 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
304 self
._add
_or
_change
_path
(s_item
, common
.OP_CHANGE
)
306 def delete_path(self
, path
):
307 """Emit the deletion of PATH."""
309 self
.dumpfile
.write('Node-path: %s\n'
310 'Node-action: delete\n'
311 '\n' % self
._utf
8_path
(path
))
313 def copy_path(self
, src_path
, dest_path
, src_revnum
):
314 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
316 # We don't need to include "Node-kind:" for copies; the loader
317 # ignores it anyway and just uses the source kind instead.
318 self
.dumpfile
.write('Node-path: %s\n'
320 'Node-copyfrom-rev: %d\n'
321 'Node-copyfrom-path: /%s\n'
323 % (self
._utf
8_path
(dest_path
),
325 self
._utf
8_path
(src_path
)))
328 """Perform any cleanup necessary after all revisions have been
331 self
.dumpfile
.close()
334 def generate_ignores(c_rev
):
336 pipe_cmd
, pipe
= Ctx().cvs_repository
.get_co_pipe(c_rev
)
337 buf
= pipe
.stdout
.read(config
.PIPE_READ_SIZE
)
340 raw_ignore_val
+= buf
341 buf
= pipe
.stdout
.read(config
.PIPE_READ_SIZE
)
343 error_output
= pipe
.stderr
.read()
344 exit_status
= pipe
.wait()
346 raise FatalError("The command '%s' failed with exit status: %s\n"
347 "and the following output:\n"
348 "%s" % (pipe_cmd
, exit_status
, error_output
))
350 # Tweak props: First, convert any spaces to newlines...
351 raw_ignore_val
= '\n'.join(raw_ignore_val
.split())
352 raw_ignores
= raw_ignore_val
.split('\n')
354 for ignore
in raw_ignores
:
355 # Reset the list if we encounter a '!'
356 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
363 ignore_vals
.append(ignore
)
368 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
371 def __init__(self
, stream
):
373 self
.carry_cr
= False
376 def read(self
, size
):
378 buf
= self
.stream
.read(size
)
379 self
.eof
= len(buf
) == 0
382 self
.carry_cr
= False
383 if not self
.eof
and buf
[-1] == '\r':
386 buf
= buf
.replace('\r\n', '\n')
387 buf
= buf
.replace('\r', '\n')
388 if len(buf
) > 0 or self
.eof
: