1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
21 from hashlib
import md5
23 from md5
import new
as md5
26 from cvs2svn_lib
import config
27 from cvs2svn_lib
.common
import FatalError
28 from cvs2svn_lib
.common
import InternalError
29 from cvs2svn_lib
.common
import path_split
30 from cvs2svn_lib
.context
import Ctx
31 from cvs2svn_lib
.cvs_path
import CVSDirectory
32 from cvs2svn_lib
.cvs_path
import CVSFile
33 from cvs2svn_lib
.svn_repository_delegate
import SVNRepositoryDelegate
34 from cvs2svn_lib
.apple_single_filter
import get_maybe_apple_single_stream
37 # Things that can happen to a file.
42 class DumpfileDelegate(SVNRepositoryDelegate
):
43 """Create a Subversion dumpfile."""
45 def __init__(self
, revision_reader
, dumpfile_path
):
46 """Return a new DumpfileDelegate instance, attached to a dumpfile
47 DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
49 self
._revision
_reader
= revision_reader
50 self
.dumpfile_path
= dumpfile_path
52 self
.dumpfile
= open(self
.dumpfile_path
, 'wb')
53 self
._write
_dumpfile
_header
(self
.dumpfile
)
55 # A set of the basic project infrastructure project directories
56 # that have been created so far, as SVN paths. (The root
57 # directory is considered to be present at initialization.) This
58 # includes all of the LOD paths, and all of their parent
60 self
._basic
_directories
= set([''])
62 def _write_dumpfile_header(self
, dumpfile
):
63 # Initialize the dumpfile with the standard headers.
65 # Since the CVS repository doesn't have a UUID, and the Subversion
66 # repository will be created with one anyway, we don't specify a
67 # UUID in the dumpflie
68 dumpfile
.write('SVN-fs-dump-format-version: 2\n\n')
70 def _utf8_path(self
, path
):
71 """Return a copy of PATH encoded in UTF-8."""
73 # Convert each path component separately (as they may each use
74 # different encodings).
77 Ctx().cvs_filename_decoder(piece
).encode('utf8')
78 for piece
in path
.split('/')
82 "Unable to convert a path '%s' to internal encoding.\n"
83 "Consider rerunning with one or more '--encoding' parameters or\n"
84 "with '--fallback-encoding'."
87 def _string_for_prop(self
, name
, value
):
88 """Return a property in the form needed for the dumpfile."""
90 return 'K %d\n%s\nV %d\n%s\n' % (len(name
), name
, len(value
), value
)
92 def start_commit(self
, revnum
, revprops
):
93 """Emit the start of SVN_COMMIT (an SVNCommit)."""
95 self
.revision
= revnum
97 # The start of a new commit typically looks like this:
100 # Prop-content-length: 129
101 # Content-length: 129
106 # Log message for revision 1.
114 # 2003-04-22T22:57:58.132837Z
117 # Notice that the length headers count everything -- not just the
118 # length of the data but also the lengths of the lengths, including
119 # the 'K ' or 'V ' prefixes.
121 # The reason there are both Prop-content-length and Content-length
122 # is that the former includes just props, while the latter includes
123 # everything. That's the generic header form for any entity in a
124 # dumpfile. But since revisions only have props, the two lengths
125 # are always the same for revisions.
127 # Calculate the output needed for the property definitions.
128 prop_names
= revprops
.keys()
131 for propname
in prop_names
:
132 if revprops
[propname
] is not None:
134 self
._string
_for
_prop
(propname
, revprops
[propname
]))
136 all_prop_strings
= ''.join(prop_strings
) + 'PROPS-END\n'
137 total_len
= len(all_prop_strings
)
139 # Print the revision header and revprops
141 'Revision-number: %d\n'
142 'Prop-content-length: %d\n'
143 'Content-length: %d\n'
147 % (self
.revision
, total_len
, total_len
, all_prop_strings
)
150 def end_commit(self
):
153 def _make_any_dir(self
, path
):
154 """Emit the creation of directory PATH."""
162 % self
._utf
8_path
(path
)
165 def _register_basic_directory(self
, path
, create
):
166 """Register the creation of PATH if it is not already there.
168 Create any parent directories that do not already exist. If
169 CREATE is set, also create PATH if it doesn't already exist. This
170 method should only be used for the LOD paths and the directories
171 containing them, not for directories within an LOD path."""
173 if path
not in self
._basic
_directories
:
174 # Make sure that the parent directory is present:
175 self
._register
_basic
_directory
(path_split(path
)[0], True)
177 self
._make
_any
_dir
(path
)
178 self
._basic
_directories
.add(path
)
180 def initialize_project(self
, project
):
181 """Create any initial directories for the project.
183 The trunk, tags, and branches directories directories are created
184 the first time the project is seen. Be sure not to create parent
185 directories that already exist (e.g., because two directories
186 share part of their paths either within or across projects)."""
188 for path
in project
.get_initial_directories():
189 self
._register
_basic
_directory
(path
, True)
191 def initialize_lod(self
, lod
):
192 lod_path
= lod
.get_path()
194 self
._register
_basic
_directory
(lod_path
, True)
196 def mkdir(self
, lod
, cvs_directory
):
197 self
._make
_any
_dir
(lod
.get_path(cvs_directory
.cvs_path
))
199 def _add_or_change_path(self
, cvs_rev
, op
):
200 """Emit the addition or change corresponding to CVS_REV.
202 OP is either the constant OP_ADD or OP_CHANGE."""
204 assert op
in [OP_ADD
, OP_CHANGE
]
206 # The property handling here takes advantage of an undocumented
207 # but IMHO consistent feature of the Subversion dumpfile-loading
208 # code. When a node's properties aren't mentioned (that is, the
209 # "Prop-content-length:" header is absent, no properties are
210 # listed at all, and there is no "PROPS-END\n" line) then no
211 # change is made to the node's properties.
213 # This is consistent with the way dumpfiles behave w.r.t. text
214 # content changes, so I'm comfortable relying on it. If you
215 # commit a change to *just* the properties of some node that
216 # already has text contents from a previous revision, then in the
217 # dumpfile output for the prop change, no "Text-content-length:"
218 # nor "Text-content-md5:" header will be present, and the text of
219 # the file will not be given. But this does not cause the file's
220 # text to be erased! It simply remains unchanged.
222 # This works out great for cvs2svn, due to lucky coincidences:
224 # For files, we set most properties in the first revision and
225 # never change them. (The only exception is the 'cvs2svn:cvs-rev'
226 # property.) If 'cvs2svn:cvs-rev' is not being used, then there
227 # is no need to remember the full set of properties on a given
228 # file once we've set it.
230 # For directories, the only property we set is "svn:ignore", and
231 # while we may change it after the first revision, we always do so
232 # based on the contents of a ".cvsignore" file -- in other words,
233 # CVS is doing the remembering for us, so we still don't have to
234 # preserve the previous value of the property ourselves.
236 # Calculate the (sorted-by-name) property string and length, if any.
237 svn_props
= cvs_rev
.get_properties()
238 if cvs_rev
.properties_changed
:
240 prop_names
= svn_props
.keys()
242 for pname
in prop_names
:
243 pvalue
= svn_props
[pname
]
244 prop_contents
+= self
._string
_for
_prop
(pname
, pvalue
)
245 prop_contents
+= 'PROPS-END\n'
246 props_header
= 'Prop-content-length: %d\n' % len(prop_contents
)
251 # If the file has keywords, we must prevent CVS/RCS from expanding
252 # the keywords because they must be unexpanded in the repository,
253 # or Subversion will get confused.
254 has_keywords
= bool(cvs_rev
.get_properties().get('svn:keywords', None))
255 stream
= self
._revision
_reader
.get_content_stream(
256 cvs_rev
, suppress_keyword_substitution
=has_keywords
259 if Ctx().decode_apple_single
:
260 # Insert a filter to decode any files that are in AppleSingle
262 stream
= get_maybe_apple_single_stream(stream
)
264 # Insert a filter to convert all EOLs to LFs if neccessary
266 eol_style
= svn_props
.get('svn:eol-style', None)
268 stream
= LF_EOL_Filter(stream
, eol_style
)
272 # treat .cvsignore as a directory property
273 dir_path
, basename
= path_split(cvs_rev
.get_svn_path())
274 if basename
== '.cvsignore':
276 ignore_vals
= generate_ignores(buf
)
277 ignore_contents
= '\n'.join(ignore_vals
)
279 ignore_contents
+= '\n'
280 ignore_contents
= ('K 10\nsvn:ignore\nV %d\n%s\n' % \
281 (len(ignore_contents
), ignore_contents
))
282 ignore_contents
+= 'PROPS-END\n'
283 ignore_len
= len(ignore_contents
)
285 # write headers, then props
289 'Node-action: change\n'
290 'Prop-content-length: %d\n'
291 'Content-length: %d\n'
294 % (self
._utf
8_path
(dir_path
),
295 ignore_len
, ignore_len
, ignore_contents
)
297 if not Ctx().keep_cvsignore
:
305 '%s' # no property header if no props
306 % (self
._utf
8_path
(cvs_rev
.get_svn_path()), op
, props_header
)
309 pos
= self
.dumpfile
.tell()
311 content_header_fmt
= (
312 'Text-content-length: %16d\n'
313 'Text-content-md5: %32s\n'
314 'Content-length: %16d\n'
318 self
.dumpfile
.write(content_header_fmt
% (0, '', 0,))
321 self
.dumpfile
.write(prop_contents
)
323 # Insert the rev contents, calculating length and checksum.
328 buf
= buf
+ stream
.read()
333 self
.dumpfile
.write(buf
)
335 # Go back to overwrite the length and checksum headers with the
336 # correct values. The content length is the length of property
337 # data, text data, and any metadata around/inside around them:
338 self
.dumpfile
.seek(pos
, 0)
341 % (length
, checksum
.hexdigest(), length
+ len(prop_contents
),)
344 # Jump back to the end of the stream
345 self
.dumpfile
.seek(0, 2)
347 # This record is done (write two newlines -- one to terminate
348 # contents that weren't themselves newline-termination, one to
349 # provide a blank line for readability.
350 self
.dumpfile
.write('\n\n')
352 def add_path(self
, cvs_rev
):
353 """Emit the addition corresponding to CVS_REV, a CVSRevisionAdd."""
355 self
._add
_or
_change
_path
(cvs_rev
, OP_ADD
)
357 def change_path(self
, cvs_rev
):
358 """Emit the change corresponding to CVS_REV, a CVSRevisionChange."""
360 self
._add
_or
_change
_path
(cvs_rev
, OP_CHANGE
)
362 def delete_lod(self
, lod
):
363 """Emit the deletion of LOD."""
367 'Node-action: delete\n'
369 % (self
._utf
8_path
(lod
.get_path()),)
371 self
._basic
_directories
.remove(lod
.get_path())
373 def delete_path(self
, lod
, cvs_path
):
374 dir_path
, basename
= path_split(lod
.get_path(cvs_path
.get_cvs_path()))
375 if basename
== '.cvsignore':
376 # When a .cvsignore file is deleted, the directory's svn:ignore
377 # property needs to be deleted.
378 ignore_contents
= 'PROPS-END\n'
379 ignore_len
= len(ignore_contents
)
381 # write headers, then props
385 'Node-action: change\n'
386 'Prop-content-length: %d\n'
387 'Content-length: %d\n'
390 % (self
._utf
8_path
(dir_path
),
391 ignore_len
, ignore_len
, ignore_contents
)
393 if not Ctx().keep_cvsignore
:
398 'Node-action: delete\n'
400 % (self
._utf
8_path
(lod
.get_path(cvs_path
.cvs_path
)),)
403 def copy_lod(self
, src_lod
, dest_lod
, src_revnum
):
404 # Register the main LOD directory, and create parent directories
406 self
._register
_basic
_directory
(dest_lod
.get_path(), False)
412 'Node-copyfrom-rev: %d\n'
413 'Node-copyfrom-path: %s\n'
415 % (self
._utf
8_path
(dest_lod
.get_path()),
416 src_revnum
, self
._utf
8_path
(src_lod
.get_path()))
419 def copy_path(self
, cvs_path
, src_lod
, dest_lod
, src_revnum
):
420 if isinstance(cvs_path
, CVSFile
):
422 if cvs_path
.basename
== '.cvsignore':
423 # FIXME: Here we have to adjust the containing directory's
424 # svn:ignore property to reflect the addition of the
425 # .cvsignore file to the LOD! This is awkward because we
426 # don't have the contents of the .cvsignore file available.
427 if not Ctx().keep_cvsignore
:
429 elif isinstance(cvs_path
, CVSDirectory
):
432 raise InternalError()
438 'Node-copyfrom-rev: %d\n'
439 'Node-copyfrom-path: %s\n'
442 self
._utf
8_path
(dest_lod
.get_path(cvs_path
.cvs_path
)),
445 self
._utf
8_path
(src_lod
.get_path(cvs_path
.cvs_path
))
450 """Perform any cleanup necessary after all revisions have been
453 self
.dumpfile
.close()
456 def generate_ignores(raw_ignore_val
):
458 for ignore
in raw_ignore_val
.split():
459 # Reset the list if we encounter a '!'
460 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
464 ignore_vals
.append(ignore
)
469 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
470 into the appropriate canonical eol style."""
472 eol_style_replacements
= {
479 def __init__(self
, stream
, eol_style
):
481 self
.replacement
= self
.eol_style_replacements
[eol_style
]
482 self
.carry_cr
= False
485 def read(self
, size
=-1):
487 buf
= self
.stream
.read(size
)
488 self
.eof
= len(buf
) == 0
491 self
.carry_cr
= False
492 if not self
.eof
and buf
[-1] == '\r':
495 buf
= buf
.replace('\r\n', '\n')
496 buf
= buf
.replace('\r', '\n')
497 if self
.replacement
!= '\n':
498 buf
= buf
.replace('\n', self
.replacement
)