Update GCS' email address to the Debian one.
[cvs2svn.git] / cvs2svn_lib / repository_walker.py
blobf56131bd4d36ef06d2f7f9b7f25a99cb86680813
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Walk through a CVS project, generating CVSPaths."""
20 import os
21 import stat
23 from cvs2svn_lib.common import path_join
24 from cvs2svn_lib.common import FatalError
25 from cvs2svn_lib.common import warning_prefix
26 from cvs2svn_lib.common import IllegalSVNPathError
27 from cvs2svn_lib.log import logger
28 from cvs2svn_lib.context import Ctx
29 from cvs2svn_lib.project import FileInAndOutOfAtticException
30 from cvs2svn_lib.cvs_path import CVSDirectory
31 from cvs2svn_lib.cvs_path import CVSFile
34 class _RepositoryWalker(object):
35 def __init__(self, file_key_generator, error_handler):
36 self.file_key_generator = file_key_generator
37 self.error_handler = error_handler
39 def _get_cvs_file(
40 self, parent_directory, basename,
41 file_in_attic=False, leave_in_attic=False,
43 """Return a CVSFile describing the file with name BASENAME.
45 PARENT_DIRECTORY is the CVSDirectory instance describing the
46 directory that physically holds this file in the filesystem.
47 BASENAME must be the base name of a *,v file within
48 PARENT_DIRECTORY.
50 FILE_IN_ATTIC is a boolean telling whether the specified file is
51 in an Attic subdirectory. If FILE_IN_ATTIC is True, then:
53 - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in
54 the filename.
56 - Otherwise, raise FileInAndOutOfAtticException if a file with the
57 same filename appears outside of Attic.
59 The CVSFile is assigned a new unique id. All of the CVSFile
60 information is filled in except mode (which can only be determined
61 by parsing the file).
63 Raise FatalError if the resulting filename would not be legal in
64 SVN."""
66 filename = os.path.join(parent_directory.rcs_path, basename)
67 try:
68 Ctx().output_option.verify_filename_legal(basename[:-2])
69 except IllegalSVNPathError, e:
70 raise FatalError(
71 'File %r would result in an illegal SVN filename: %s'
72 % (filename, e,)
75 if file_in_attic and not leave_in_attic:
76 in_attic = True
77 logical_parent_directory = parent_directory.parent_directory
79 # If this file also exists outside of the attic, it's a fatal
80 # error:
81 non_attic_filename = os.path.join(
82 logical_parent_directory.rcs_path, basename,
84 if os.path.exists(non_attic_filename):
85 raise FileInAndOutOfAtticException(non_attic_filename, filename)
86 else:
87 in_attic = False
88 logical_parent_directory = parent_directory
90 file_stat = os.stat(filename)
92 # The size of the file in bytes:
93 file_size = file_stat.st_size
95 # Whether or not the executable bit is set:
96 file_executable = bool(file_stat.st_mode & stat.S_IXUSR)
98 # mode is not known, so we temporarily set it to None.
99 return CVSFile(
100 self.file_key_generator.gen_id(),
101 parent_directory.project, logical_parent_directory, basename[:-2],
102 in_attic, file_executable, file_size, None, None
105 def _get_attic_file(self, parent_directory, basename):
106 """Return a CVSFile object for the Attic file at BASENAME.
108 PARENT_DIRECTORY is the CVSDirectory that physically contains the
109 file on the filesystem (i.e., the Attic directory). It is not
110 necessarily the parent_directory of the CVSFile that will be
111 returned.
113 Return CVSFile, whose parent directory is usually
114 PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
115 iff CVSFile will remain in the Attic directory."""
117 try:
118 return self._get_cvs_file(
119 parent_directory, basename, file_in_attic=True,
121 except FileInAndOutOfAtticException, e:
122 if Ctx().retain_conflicting_attic_files:
123 logger.warn(
124 "%s: %s;\n"
125 " storing the latter into 'Attic' subdirectory.\n"
126 % (warning_prefix, e)
128 else:
129 self.error_handler(str(e))
131 # Either way, return a CVSFile object so that the rest of the
132 # file processing can proceed:
133 return self._get_cvs_file(
134 parent_directory, basename, file_in_attic=True, leave_in_attic=True,
137 def _generate_attic_cvs_files(self, cvs_directory, exclude_paths):
138 """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.
140 Also yield CVS_DIRECTORY if any files are being retained in the
141 Attic.
143 Silently ignore subdirectories named '.svn' or 'CVS', but emit a
144 warning if any other directories are found within the Attic
145 directory."""
147 retained_attic_files = []
149 fnames = os.listdir(cvs_directory.rcs_path)
150 fnames.sort()
151 for fname in fnames:
152 pathname = os.path.join(cvs_directory.rcs_path, fname)
153 path_in_repository = path_join(cvs_directory.get_cvs_path(), fname)
154 if path_in_repository in exclude_paths:
155 logger.normal(
156 "Excluding file from conversion: %s" % (path_in_repository,)
158 elif os.path.isdir(pathname):
159 if fname == '.svn' or fname == 'CVS':
160 logger.debug(
161 "Directory %s found within Attic; ignoring" % (pathname,)
163 else:
164 logger.warn(
165 "Directory %s found within Attic; ignoring" % (pathname,)
167 elif fname.endswith(',v'):
168 cvs_file = self._get_attic_file(cvs_directory, fname)
169 if cvs_file.parent_directory == cvs_directory:
170 # This file will be retained in the Attic directory.
171 retained_attic_files.append(cvs_file)
172 else:
173 # This is a normal Attic file, which is treated as if it
174 # were located one directory up:
175 yield cvs_file
177 if retained_attic_files:
178 # There was at least one file in the attic that will be retained
179 # in the attic. First include the Attic directory itself in the
180 # output, then the retained attic files:
181 yield cvs_directory
182 for cvs_file in retained_attic_files:
183 yield cvs_file
185 def generate_cvs_paths(self, cvs_directory, exclude_paths):
186 """Generate the CVSPaths under non-Attic directory CVS_DIRECTORY.
188 Yield CVSDirectory and CVSFile instances as they are found.
189 Process directories recursively, including Attic directories.
190 Also look for conflicts between the filenames that will result
191 from files, attic files, and subdirectories.
193 Silently ignore subdirectories named 'CVS', as these are used by
194 CVS to store metadata that are irrelevant to the conversion.
195 Silently ignore subdirectories named '.svn', as these don't make
196 much sense in a real conversion, but they are present in our test
197 suite."""
199 yield cvs_directory
201 # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files
202 # directly in cvs_directory:
203 rcsfiles = {}
205 attic_dir = None
207 # Non-Attic subdirectories of cvs_directory (to be recursed into):
208 dirs = []
210 fnames = os.listdir(cvs_directory.rcs_path)
211 fnames.sort()
212 for fname in fnames:
213 pathname = os.path.join(cvs_directory.rcs_path, fname)
214 path_in_repository = path_join(cvs_directory.get_cvs_path(), fname)
215 if path_in_repository in exclude_paths:
216 logger.normal(
217 "Excluding file from conversion: %s" % (path_in_repository,)
219 pass
220 elif os.path.isdir(pathname):
221 if fname == 'Attic':
222 attic_dir = fname
223 elif fname == '.svn' or fname == 'CVS':
224 logger.debug("Directory %s ignored" % (pathname,))
225 else:
226 dirs.append(fname)
227 elif fname.endswith(',v'):
228 cvs_file = self._get_cvs_file(cvs_directory, fname)
229 rcsfiles[cvs_file.rcs_basename] = cvs_file.rcs_path
230 yield cvs_file
231 else:
232 # Silently ignore other files:
233 pass
235 # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files in an
236 # Attic directory within cvs_directory:
237 attic_rcsfiles = {}
239 if attic_dir is not None:
240 attic_directory = CVSDirectory(
241 self.file_key_generator.gen_id(),
242 cvs_directory.project, cvs_directory, 'Attic',
245 for cvs_path in self._generate_attic_cvs_files(attic_directory, exclude_paths):
246 if isinstance(cvs_path, CVSFile) \
247 and cvs_path.parent_directory == cvs_directory:
248 attic_rcsfiles[cvs_path.rcs_basename] = cvs_path.rcs_path
250 yield cvs_path
252 alldirs = dirs + [attic_dir]
253 else:
254 alldirs = dirs
256 # Check for conflicts between directory names and the filenames
257 # that will result from the rcs files (both in this directory and
258 # in attic). (We recurse into the subdirectories nevertheless, to
259 # try to detect more problems.)
260 for fname in alldirs:
261 for rcsfile_list in [rcsfiles, attic_rcsfiles]:
262 if fname in rcsfile_list:
263 self.error_handler(
264 'Directory name conflicts with filename. Please remove or '
265 'rename one\n'
266 'of the following:\n'
267 ' "%s"\n'
268 ' "%s"' % (
269 os.path.join(cvs_directory.rcs_path, fname),
270 rcsfile_list[fname],
274 # Now recurse into the other subdirectories:
275 for fname in dirs:
276 dirname = os.path.join(cvs_directory.rcs_path, fname)
278 # Verify that the directory name does not contain any illegal
279 # characters:
280 try:
281 Ctx().output_option.verify_filename_legal(fname)
282 except IllegalSVNPathError, e:
283 raise FatalError(
284 'Directory %r would result in an illegal SVN path name: %s'
285 % (dirname, e,)
288 sub_directory = CVSDirectory(
289 self.file_key_generator.gen_id(),
290 cvs_directory.project, cvs_directory, fname,
293 for cvs_path in self.generate_cvs_paths(sub_directory, exclude_paths):
294 yield cvs_path
297 def walk_repository(project, file_key_generator, error_handler):
298 """Generate CVSDirectories and CVSFiles within PROJECT.
300 Use FILE_KEY_GENERATOR to generate the IDs used for files. If there
301 is a fatal error, register it by calling ERROR_HANDLER with a string
302 argument describing the problem. (The error will be logged but
303 processing will continue through the end of the pass.) Also:
305 * Set PROJECT.root_cvs_directory_id.
307 * Handle files in the Attic by generating CVSFile instances with the
308 _in_attic member set.
310 * Check for naming conflicts that will result from files in and out
311 of the Attic. If Ctx().retain_conflicting_attic_files is set, fix
312 the conflicts by leaving the Attic file in the attic. Otherwise,
313 register a fatal error.
315 * Check for naming conflicts between files (in or out of the Attic)
316 and directories.
318 * Check for filenames that contain characters not allowed by
319 Subversion.
323 root_cvs_directory = CVSDirectory(
324 file_key_generator.gen_id(), project, None, ''
326 project.root_cvs_directory_id = root_cvs_directory.id
327 repository_walker = _RepositoryWalker(file_key_generator, error_handler)
328 for cvs_path in repository_walker.generate_cvs_paths(
329 root_cvs_directory, project.exclude_paths
331 yield cvs_path