Rename CVSPath.filename to rcs_path.
[cvs2svn.git] / cvs2svn_lib / repository_walker.py
blob963e2e06af05c656dc7a6b044febaffaaad9b7ec
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Walk through a CVS project, generating CVSPaths."""
20 import os
21 import stat
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.common import warning_prefix
25 from cvs2svn_lib.common import IllegalSVNPathError
26 from cvs2svn_lib.log import logger
27 from cvs2svn_lib.context import Ctx
28 from cvs2svn_lib.project import FileInAndOutOfAtticException
29 from cvs2svn_lib.cvs_path import CVSDirectory
30 from cvs2svn_lib.cvs_path import CVSFile
33 class _RepositoryWalker(object):
34 def __init__(self, file_key_generator, error_handler):
35 self.file_key_generator = file_key_generator
36 self.error_handler = error_handler
38 def _get_cvs_file(
39 self, parent_directory, basename,
40 file_in_attic=False, leave_in_attic=False,
42 """Return a CVSFile describing the file with name BASENAME.
44 PARENT_DIRECTORY is the CVSDirectory instance describing the
45 directory that physically holds this file in the filesystem.
46 BASENAME must be the base name of a *,v file within
47 PARENT_DIRECTORY.
49 FILE_IN_ATTIC is a boolean telling whether the specified file is
50 in an Attic subdirectory. If FILE_IN_ATTIC is True, then:
52 - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in
53 the filename.
55 - Otherwise, raise FileInAndOutOfAtticException if a file with the
56 same filename appears outside of Attic.
58 The CVSFile is assigned a new unique id. All of the CVSFile
59 information is filled in except mode (which can only be determined
60 by parsing the file).
62 Raise FatalError if the resulting filename would not be legal in
63 SVN."""
65 filename = os.path.join(parent_directory.rcs_path, basename)
66 try:
67 Ctx().output_option.verify_filename_legal(basename[:-2])
68 except IllegalSVNPathError, e:
69 raise FatalError(
70 'File %r would result in an illegal SVN filename: %s'
71 % (filename, e,)
74 if file_in_attic and not leave_in_attic:
75 in_attic = True
76 logical_parent_directory = parent_directory.parent_directory
78 # If this file also exists outside of the attic, it's a fatal
79 # error:
80 non_attic_filename = os.path.join(
81 logical_parent_directory.rcs_path, basename,
83 if os.path.exists(non_attic_filename):
84 raise FileInAndOutOfAtticException(non_attic_filename, filename)
85 else:
86 in_attic = False
87 logical_parent_directory = parent_directory
89 file_stat = os.stat(filename)
91 # The size of the file in bytes:
92 file_size = file_stat.st_size
94 # Whether or not the executable bit is set:
95 file_executable = bool(file_stat.st_mode & stat.S_IXUSR)
97 # mode is not known, so we temporarily set it to None.
98 return CVSFile(
99 self.file_key_generator.gen_id(),
100 parent_directory.project, logical_parent_directory, basename[:-2],
101 in_attic, file_executable, file_size, None, None
104 def _get_attic_file(self, parent_directory, basename):
105 """Return a CVSFile object for the Attic file at BASENAME.
107 PARENT_DIRECTORY is the CVSDirectory that physically contains the
108 file on the filesystem (i.e., the Attic directory). It is not
109 necessarily the parent_directory of the CVSFile that will be
110 returned.
112 Return CVSFile, whose parent directory is usually
113 PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
114 iff CVSFile will remain in the Attic directory."""
116 try:
117 return self._get_cvs_file(
118 parent_directory, basename, file_in_attic=True,
120 except FileInAndOutOfAtticException, e:
121 if Ctx().retain_conflicting_attic_files:
122 logger.warn(
123 "%s: %s;\n"
124 " storing the latter into 'Attic' subdirectory.\n"
125 % (warning_prefix, e)
127 else:
128 self.error_handler(str(e))
130 # Either way, return a CVSFile object so that the rest of the
131 # file processing can proceed:
132 return self._get_cvs_file(
133 parent_directory, basename, file_in_attic=True, leave_in_attic=True,
136 def _generate_attic_cvs_files(self, cvs_directory):
137 """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.
139 Also yield CVS_DIRECTORY if any files are being retained in the
140 Attic.
142 Silently ignore subdirectories named '.svn', but emit a warning if
143 any other directories are found within the Attic directory."""
145 retained_attic_files = []
147 fnames = os.listdir(cvs_directory.rcs_path)
148 fnames.sort()
149 for fname in fnames:
150 pathname = os.path.join(cvs_directory.rcs_path, fname)
151 if os.path.isdir(pathname):
152 if fname == '.svn':
153 logger.debug(
154 "Directory %s found within Attic; ignoring" % (pathname,)
156 else:
157 logger.warn(
158 "Directory %s found within Attic; ignoring" % (pathname,)
160 elif fname.endswith(',v'):
161 cvs_file = self._get_attic_file(cvs_directory, fname)
162 if cvs_file.parent_directory == cvs_directory:
163 # This file will be retained in the Attic directory.
164 retained_attic_files.append(cvs_file)
165 else:
166 # This is a normal Attic file, which is treated as if it
167 # were located one directory up:
168 yield cvs_file
170 if retained_attic_files:
171 # There was at least one file in the attic that will be retained
172 # in the attic. First include the Attic directory itself in the
173 # output, then the retained attic files:
174 yield cvs_directory
175 for cvs_file in retained_attic_files:
176 yield cvs_file
178 def generate_cvs_paths(self, cvs_directory):
179 """Generate the CVSPaths under non-Attic directory CVS_DIRECTORY.
181 Yield CVSDirectory and CVSFile instances as they are found.
182 Process directories recursively, including Attic directories.
183 Also look for conflicts between the filenames that will result
184 from files, attic files, and subdirectories.
186 Silently ignore subdirectories named '.svn', as these don't make
187 much sense in a real conversion, but they are present in our test
188 suite."""
190 yield cvs_directory
192 # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files
193 # directly in cvs_directory:
194 rcsfiles = {}
196 attic_dir = None
198 # Non-Attic subdirectories of cvs_directory (to be recursed into):
199 dirs = []
201 fnames = os.listdir(cvs_directory.rcs_path)
202 fnames.sort()
203 for fname in fnames:
204 pathname = os.path.join(cvs_directory.rcs_path, fname)
205 if os.path.isdir(pathname):
206 if fname == 'Attic':
207 attic_dir = fname
208 elif fname == '.svn':
209 logger.debug("Directory %s ignored" % (pathname,))
210 else:
211 dirs.append(fname)
212 elif fname.endswith(',v'):
213 cvs_file = self._get_cvs_file(cvs_directory, fname)
214 rcsfiles[cvs_file.rcs_basename] = cvs_file.rcs_path
215 yield cvs_file
216 else:
217 # Silently ignore other files:
218 pass
220 # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files in an
221 # Attic directory within cvs_directory:
222 attic_rcsfiles = {}
224 if attic_dir is not None:
225 attic_directory = CVSDirectory(
226 self.file_key_generator.gen_id(),
227 cvs_directory.project, cvs_directory, 'Attic',
230 for cvs_path in self._generate_attic_cvs_files(attic_directory):
231 if isinstance(cvs_path, CVSFile) \
232 and cvs_path.parent_directory == cvs_directory:
233 attic_rcsfiles[cvs_path.rcs_basename] = cvs_path.rcs_path
235 yield cvs_path
237 alldirs = dirs + [attic_dir]
238 else:
239 alldirs = dirs
241 # Check for conflicts between directory names and the filenames
242 # that will result from the rcs files (both in this directory and
243 # in attic). (We recurse into the subdirectories nevertheless, to
244 # try to detect more problems.)
245 for fname in alldirs:
246 for rcsfile_list in [rcsfiles, attic_rcsfiles]:
247 if fname in rcsfile_list:
248 self.error_handler(
249 'Directory name conflicts with filename. Please remove or '
250 'rename one\n'
251 'of the following:\n'
252 ' "%s"\n'
253 ' "%s"' % (
254 os.path.join(cvs_directory.rcs_path, fname),
255 rcsfile_list[fname],
259 # Now recurse into the other subdirectories:
260 for fname in dirs:
261 dirname = os.path.join(cvs_directory.rcs_path, fname)
263 # Verify that the directory name does not contain any illegal
264 # characters:
265 try:
266 Ctx().output_option.verify_filename_legal(fname)
267 except IllegalSVNPathError, e:
268 raise FatalError(
269 'Directory %r would result in an illegal SVN path name: %s'
270 % (dirname, e,)
273 sub_directory = CVSDirectory(
274 self.file_key_generator.gen_id(),
275 cvs_directory.project, cvs_directory, fname,
278 for cvs_path in self.generate_cvs_paths(sub_directory):
279 yield cvs_path
282 def walk_repository(project, file_key_generator, error_handler):
283 """Generate CVSDirectories and CVSFiles within PROJECT.
285 Use FILE_KEY_GENERATOR to generate the IDs used for files. If there
286 is a fatal error, register it by calling ERROR_HANDLER with a string
287 argument describing the problem. (The error will be logged but
288 processing will continue through the end of the pass.) Also:
290 * Set PROJECT.root_cvs_directory_id.
292 * Handle files in the Attic by generating CVSFile instances with the
293 _in_attic member set.
295 * Check for naming conflicts that will result from files in and out
296 of the Attic. If Ctx().retain_conflicting_attic_files is set, fix
297 the conflicts by leaving the Attic file in the attic. Otherwise,
298 register a fatal error.
300 * Check for naming conflicts between files (in or out of the Attic)
301 and directories.
303 * Check for filenames that contain characters not allowed by
304 Subversion.
308 root_cvs_directory = CVSDirectory(
309 file_key_generator.gen_id(), project, None, ''
311 project.root_cvs_directory_id = root_cvs_directory.id
312 repository_walker = _RepositoryWalker(file_key_generator, error_handler)
313 for cvs_path in repository_walker.generate_cvs_paths(root_cvs_directory):
314 yield cvs_path