Add a method CVSPath.get_path_components().
[cvs2svn.git] / cvs2svn_lib / cvs_path.py
blob083c8c28f514c9fd8cf8d96769b503170cd9f8d4
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes that represent files and directories within CVS repositories."""
19 import os
21 from cvs2svn_lib.common import path_join
22 from cvs2svn_lib.context import Ctx
25 class CVSPath(object):
26 """Represent a CVS file or directory.
28 Members:
30 id -- (int) unique ID for this CVSPath. At any moment, there is
31 at most one CVSPath instance with a particular ID. (This
32 means that object identity is the same as object equality, and
33 objects can be used as map keys even though they don't have a
34 __hash__() method).
36 project -- (Project) the project containing this CVSPath.
38 parent_directory -- (CVSDirectory or None) the CVSDirectory
39 containing this CVSPath.
41 rcs_basename -- (string) the base name of the filename path in the
42 CVS repository corresponding to this CVSPath (but with ',v'
43 removed for CVSFiles). The rcs_basename of the root directory
44 of a project is ''.
46 rcs_path -- (string) the filesystem path to this CVSPath in the
47 CVS repository. This is in native format, and already
48 normalised the way os.path.normpath() normalises paths. It
49 starts with the repository path passed to
50 run_options.add_project() in the options.py file.
52 ordinal -- (int) the order that this instance should be sorted
53 relative to other CVSPath instances. This member is set based
54 on the ordering imposed by sort_key() by CVSPathDatabase after
55 all CVSFiles have been processed. Comparisons of CVSPath
56 using __cmp__() simply compare the ordinals.
58 """
60 __slots__ = [
61 'id',
62 'project',
63 'parent_directory',
64 'rcs_basename',
65 'ordinal',
66 'rcs_path',
69 def __init__(self, id, project, parent_directory, rcs_basename):
70 self.id = id
71 self.project = project
72 self.parent_directory = parent_directory
73 self.rcs_basename = rcs_basename
75 # The rcs_path used to be computed on demand, but it turned out to
76 # be a hot path through the code in some cases. It's used by
77 # SubtreeSymbolTransform and similar transforms, so it's called at
78 # least:
80 # (num_files * num_symbols_per_file * num_subtree_symbol_transforms)
82 # times. On a large repository with several subtree symbol
83 # transforms, that can exceed 100,000,000 calls. And
84 # _calculate_rcs_path() is quite complex, so doing that every time
85 # could add about 10 minutes to the cvs2svn runtime.
87 # So now we precalculate this and just return it.
88 self.rcs_path = os.path.normpath(self._calculate_rcs_path())
90 def __getstate__(self):
91 """This method must only be called after ordinal has been set."""
93 return (
94 self.id, self.project.id,
95 self.parent_directory, self.rcs_basename,
96 self.ordinal,
99 def __setstate__(self, state):
101 self.id, project_id,
102 self.parent_directory, self.rcs_basename,
103 self.ordinal,
104 ) = state
105 self.project = Ctx()._projects[project_id]
106 self.rcs_path = os.path.normpath(self._calculate_rcs_path())
108 def get_ancestry(self):
109 """Return a list of the CVSPaths leading from the root path to SELF.
111 Return the CVSPaths in a list, starting with
112 self.project.get_root_cvs_directory() and ending with self."""
114 ancestry = []
115 p = self
116 while p is not None:
117 ancestry.append(p)
118 p = p.parent_directory
120 ancestry.reverse()
121 return ancestry
123 def get_path_components(self, rcs=False):
124 """Return the path components to this CVSPath.
126 Return the components of this CVSPath's path, relative to the
127 project's project_cvs_repos_path, as a list of strings. If rcs is
128 True, return the components of the filesystem path to the RCS file
129 corresponding to this CVSPath (i.e., including any 'Attic'
130 component and trailing ',v'. If rcs is False, return the
131 components of the logical CVS path name (i.e., including 'Attic'
132 only if the file is to be left in an Attic directory in the SVN
133 repository and without trailing ',v')."""
135 raise NotImplementedError()
137 def get_cvs_path(self):
138 """Return the canonical path within the Project.
140 The canonical path:
142 - Uses forward slashes
144 - Doesn't include ',v' for files
146 - This doesn't include the 'Attic' segment of the path unless the
147 file is to be left in an Attic directory in the SVN repository;
148 i.e., if a filename exists in and out of Attic and the
149 --retain-conflicting-attic-files option was specified.
153 return path_join(*[p.rcs_basename for p in self.get_ancestry()[1:]])
155 cvs_path = property(get_cvs_path)
157 def _get_dir_components(self):
158 """Return a list containing the components of the path leading to SELF.
160 The return value contains the base names of all of the parent
161 directories (except for the root directory) and SELF."""
163 return [p.rcs_basename for p in self.get_ancestry()[1:]]
165 def _calculate_rcs_path(self):
166 """Return the filesystem path in the CVS repo corresponding to SELF."""
168 return os.path.join(
169 self.project.project_cvs_repos_path,
170 *self.get_path_components(rcs=True)
173 def __eq__(a, b):
174 """Compare two CVSPath instances for equality.
176 This method is supplied to avoid using __cmp__() for comparing for
177 equality."""
179 return a is b
181 def sort_key(self):
182 """Return the key that should be used for sorting CVSPath instances.
184 This is a relatively expensive computation, so it is only used
185 once, the the results are used to set the ordinal member."""
187 return (
188 # Sort first by project:
189 self.project,
190 # Then by directory components:
191 self._get_dir_components(),
194 def __cmp__(a, b):
195 """This method must only be called after ordinal has been set."""
197 return cmp(a.ordinal, b.ordinal)
200 class CVSDirectory(CVSPath):
201 """Represent a CVS directory.
203 Members:
205 id -- (int or None) unique id for this file. If None, a new id is
206 generated.
208 project -- (Project) the project containing this file.
210 parent_directory -- (CVSDirectory or None) the CVSDirectory
211 containing this CVSDirectory.
213 rcs_basename -- (string) the base name of the filename path in the
214 CVS repository corresponding to this CVSDirectory. The
215 rcs_basename of the root directory of a project is ''.
217 ordinal -- (int) the order that this instance should be sorted
218 relative to other CVSPath instances. See CVSPath.ordinal.
220 empty_subdirectory_ids -- (list of int) a list of the ids of any
221 direct subdirectories that are empty. (An empty directory is
222 defined to be a directory that doesn't contain any RCS files
223 or non-empty subdirectories.
227 __slots__ = ['empty_subdirectory_ids']
229 def __init__(self, id, project, parent_directory, rcs_basename):
230 """Initialize a new CVSDirectory object."""
232 CVSPath.__init__(self, id, project, parent_directory, rcs_basename)
234 # This member is filled in by CollectData.close():
235 self.empty_subdirectory_ids = []
237 def get_path_components(self, rcs=False):
238 if self.parent_directory is None:
239 return []
240 else:
241 components = self.parent_directory.get_path_components(rcs=rcs)
242 components.append(self.rcs_basename)
243 return components
245 def __getstate__(self):
246 return (
247 CVSPath.__getstate__(self),
248 self.empty_subdirectory_ids,
251 def __setstate__(self, state):
253 cvs_path_state,
254 self.empty_subdirectory_ids,
255 ) = state
256 CVSPath.__setstate__(self, cvs_path_state)
258 def __str__(self):
259 """For convenience only. The format is subject to change at any time."""
261 return self.cvs_path + '/'
263 def __repr__(self):
264 return 'CVSDirectory<%x>(%r)' % (self.id, str(self),)
267 class CVSFile(CVSPath):
268 """Represent a CVS file.
270 Members:
272 id -- (int) unique id for this file.
274 project -- (Project) the project containing this file.
276 parent_directory -- (CVSDirectory) the CVSDirectory containing
277 this CVSFile.
279 rcs_basename -- (string) the base name of the RCS file in the CVS
280 repository corresponding to this CVSPath (but with the ',v'
281 removed).
283 ordinal -- (int) the order that this instance should be sorted
284 relative to other CVSPath instances. See CVSPath.ordinal.
286 _in_attic -- (bool) True if RCS file is in an Attic subdirectory
287 that is not considered the parent directory. (If a file is
288 in-and-out-of-attic and one copy is to be left in Attic after
289 the conversion, then the Attic directory is that file's
290 PARENT_DIRECTORY and _IN_ATTIC is False.)
292 executable -- (bool) True iff RCS file has executable bit set.
294 file_size -- (long) size of the RCS file in bytes.
296 mode -- (string or None) 'kv', 'b', etc., as read from the CVS
297 file.
299 description -- (string or None) the file description as read from
300 the RCS file.
302 properties -- (dict) file properties that are preserved across
303 this history of this file. Keys are strings; values are
304 strings (indicating the property value) or None (indicating
305 that the property should be left unset). These properties can
306 be overridden by CVSRevision.properties. Different backends
307 can use these properties for different purposes; for cvs2svn
308 they become SVN versioned properties. Properties whose names
309 start with underscore are reserved for internal cvs2svn
310 purposes.
312 PARENT_DIRECTORY might contain an 'Attic' component if it should be
313 retained in the SVN repository; i.e., if the same filename exists
314 out of Attic and the --retain-conflicting-attic-files option was
315 specified.
319 __slots__ = [
320 '_in_attic',
321 'executable',
322 'file_size',
323 'mode',
324 'description',
325 'properties',
328 def __init__(
329 self, id, project, parent_directory, rcs_basename, in_attic,
330 executable, file_size, mode, description
332 """Initialize a new CVSFile object."""
334 assert parent_directory is not None
336 # This member is needed by _calculate_rcs_path(), which is
337 # called by CVSPath.__init__(). So initialize it before calling
338 # CVSPath.__init__().
339 self._in_attic = in_attic
340 CVSPath.__init__(self, id, project, parent_directory, rcs_basename)
342 self.executable = executable
343 self.file_size = file_size
344 self.mode = mode
345 self.description = description
346 self.properties = None
348 def determine_file_properties(self, file_property_setters):
349 """Determine the properties for this file from FILE_PROPERTY_SETTERS.
351 This must only be called after SELF.mode and SELF.description have
352 been set by CollectData."""
354 self.properties = {}
356 for file_property_setter in file_property_setters:
357 file_property_setter.set_properties(self)
359 def get_path_components(self, rcs=False):
360 components = self.parent_directory.get_path_components(rcs=rcs)
361 if rcs:
362 if self._in_attic:
363 components.append('Attic')
364 components.append(self.rcs_basename + ',v')
365 else:
366 components.append(self.rcs_basename)
367 return components
369 def __getstate__(self):
370 return (
371 CVSPath.__getstate__(self),
372 self._in_attic, self.executable, self.file_size, self.mode,
373 self.description, self.properties,
376 def __setstate__(self, state):
378 cvs_path_state,
379 self._in_attic, self.executable, self.file_size, self.mode,
380 self.description, self.properties,
381 ) = state
382 CVSPath.__setstate__(self, cvs_path_state)
384 def __str__(self):
385 """For convenience only. The format is subject to change at any time."""
387 return self.cvs_path
389 def __repr__(self):
390 return 'CVSFile<%x>(%r)' % (self.id, str(self),)