From 43161af61866412775d44f3d5a0df55cdce7cdd5 Mon Sep 17 00:00:00 2001 From: mhagger Date: Thu, 8 Apr 2010 19:20:47 +0000 Subject: [PATCH] Normalize and cache the paths returned by CVSFile.get_filename(). Patch by: Jon Foster Selected parts of my profiling results: ncalls cumtime function 1 4183 main.py:run_with_profiling 1 2997 collect_data.py:process_project 2M 1957 collect_data.py:transform_symbol 42M 1845 SubtreeSymbolTransform.__does_rule_apply_to 234M 736 os.path.dirname 42M 476 CVSFile.get_filename 42M 285 os.path.normpath 95M* 266 CVSDirectory.get_filename (*including recursive calls, which were about 50% of the total). So CVSFile.get_filename() is responsible for about 10% of cvs2svn's overall runtime. This patch reduces that to almost zero, by calculating the filename when the CVSFile is created, and storing it directly in a CVSFile.filename member variable. git-svn-id: http://cvs2svn.tigris.org/svn/cvs2svn/trunk@5108 be7e6eca-30d4-0310-a8e5-ac0d63af7087 --- cvs2svn_lib/cvs_path.py | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/cvs2svn_lib/cvs_path.py b/cvs2svn_lib/cvs_path.py index 9f08dd57..04fb54ff 100644 --- a/cvs2svn_lib/cvs_path.py +++ b/cvs2svn_lib/cvs_path.py @@ -55,6 +55,7 @@ class CVSPath(object): 'parent_directory', 'basename', 'ordinal', + 'filename', ] def __init__(self, id, project, parent_directory, basename): @@ -63,6 +64,8 @@ class CVSPath(object): self.parent_directory = parent_directory self.basename = basename + self.filename = os.path.normpath(self._calculate_filename()) + def __getstate__(self): """This method must only be called after ordinal has been set.""" @@ -79,6 +82,29 @@ class CVSPath(object): self.ordinal, ) = state self.project = Ctx()._projects[project_id] + self.filename = os.path.normpath(self._calculate_filename()) + + def get_filename(self): + """Return the filesystem path to this CVSPath in the CVS repository. + + This is in native format, and already normalised the way + os.path.normpath() normalises paths. + + It starts with the repository path passed to run_options.add_project() + in the options.py file.""" + + # This turns out to be a hot path through the code. + # It's used by SubtreeSymbolTransform and similar transforms, so it's + # called at least: + # (num_files * num_symbols_per_file * num_subtree_symbol_transforms) + # times. On a large repository with several subtree symbol transforms, + # that can exceed 100,000,000 calls. And _calculate_filename() is quite + # complex, so doing that every time could add about 10 minutes to the + # cvs2svn runtime. + # + # So now we precalculate this and just return it. + + return self.filename def get_ancestry(self): """Return a list of the CVSPaths leading from the root path to SELF. @@ -175,22 +201,21 @@ class CVSDirectory(CVSPath): def __init__(self, id, project, parent_directory, basename): """Initialize a new CVSDirectory object.""" - CVSPath.__init__(self, id, project, parent_directory, basename) # This member is filled in by CollectData.close(): self.empty_subdirectory_ids = [] - def get_filename(self): + CVSPath.__init__(self, id, project, parent_directory, basename) + + def _calculate_filename(self): """Return the filesystem path to this CVSPath in the CVS repository.""" if self.parent_directory is None: return self.project.project_cvs_repos_path else: return os.path.join( - self.parent_directory.get_filename(), self.basename + self.parent_directory.filename, self.basename ) - filename = property(get_filename) - def __getstate__(self): return ( CVSPath.__getstate__(self), @@ -262,16 +287,16 @@ class CVSFile(CVSPath): ): """Initialize a new CVSFile object.""" - CVSPath.__init__(self, id, project, parent_directory, basename) + assert parent_directory is not None + self._in_attic = in_attic self.executable = executable self.file_size = file_size self.mode = mode self.description = description + CVSPath.__init__(self, id, project, parent_directory, basename) - assert self.parent_directory is not None - - def get_filename(self): + def _calculate_filename(self): """Return the filesystem path to this CVSPath in the CVS repository.""" if self._in_attic: @@ -283,8 +308,6 @@ class CVSFile(CVSPath): self.parent_directory.filename, self.basename + ',v' ) - filename = property(get_filename) - def __getstate__(self): return ( CVSPath.__getstate__(self), -- 2.11.4.GIT