Centralize gc policy in a new GarbageCollectionPolicy class.
[cvs2svn.git] / cvs2svn_lib / pass_manager.py
blobe223ce6309a1b3ec6550c076417b5bc3f249f377
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains tools to manage the passes of a conversion."""
20 import time
21 import gc
23 from cvs2svn_lib import config
24 from cvs2svn_lib.common import FatalError
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.log import logger
27 from cvs2svn_lib.stats_keeper import StatsKeeper
28 from cvs2svn_lib.stats_keeper import read_stats_keeper
29 from cvs2svn_lib.artifact_manager import artifact_manager
32 class InvalidPassError(FatalError):
33 def __init__(self, msg):
34 FatalError.__init__(
35 self, msg + '\nUse --help-passes for more information.')
38 class GarbageCollectionPolicy(object):
39 """Defines how garbage is to be handled.
41 This version just lets the Python garbage collector do its thing."""
43 def check_for_garbage(self):
44 pass
47 class NoGarbageCollectionPolicy(GarbageCollectionPolicy):
48 """Disable the Python garbage collector.
50 When check_for_garbage() is called, run the garbage collector once
51 to verify that no garbage has been created since the last call. If
52 any garbage was found, log it at the DEBUG level.
54 Since cvs2svn does not not create any circular data structures,
55 CPython's reference-counting works perfectly and garbage collection
56 is unnecessary. But the automatic runs of the garbage collector
57 have a very measurable performance cost. So we want to turn off
58 garbage collection.
60 However, it would be easy for a programming error to cause a
61 circular data structure to be created, creating garbage. So the
62 check_for_garbage() method is run once per pass to see whether
63 garbage was indeed created. If so, it reports the error at DEBUG
64 level.
66 """
68 def __init__(self):
69 try:
70 gc.disable()
71 except (AttributeError, NotImplementedError):
72 # Other Python implementations implement garbage collection
73 # differently, so if an error occurs just ignore it.
74 pass
76 def check_for_garbage(self):
77 """Check for any unreachable objects.
79 Generate a DEBUG-level warning if any were found."""
81 try:
82 gc.set_debug(gc.DEBUG_SAVEALL)
83 gc_count = gc.collect()
84 if gc_count:
85 if logger.is_on(logger.DEBUG):
86 logger.debug(
87 'INTERNAL: %d unreachable object(s) were garbage collected:'
88 % (gc_count,)
90 for g in gc.garbage:
91 logger.debug(' %s' % (g,))
92 del gc.garbage[:]
93 except (AttributeError, NotImplementedError):
94 # Other Python implementations implement garbage collection
95 # differently, so if errors occur just ignore them.
96 pass
99 class Pass(object):
100 """Base class for one step of the conversion."""
102 def __init__(self):
103 # By default, use the pass object's class name as the pass name:
104 self.name = self.__class__.__name__
106 def register_artifacts(self):
107 """Register artifacts (created and needed) in artifact_manager."""
109 raise NotImplementedError()
111 def _register_temp_file(self, basename):
112 """Helper method; for brevity only."""
114 artifact_manager.register_temp_file(basename, self)
116 def _register_temp_file_needed(self, basename):
117 """Helper method; for brevity only."""
119 artifact_manager.register_temp_file_needed(basename, self)
121 def run(self, run_options, stats_keeper):
122 """Carry out this step of the conversion.
124 RUN_OPTIONS is an instance of RunOptions. STATS_KEEPER is an
125 instance of StatsKeeper."""
127 raise NotImplementedError()
130 class PassManager:
131 """Manage a list of passes that can be executed separately or all at once.
133 Passes are numbered starting with 1."""
135 def __init__(self, passes):
136 """Construct a PassManager with the specified PASSES.
138 Internally, passes are numbered starting with 1. So PASSES[0] is
139 considered to be pass number 1."""
141 self.passes = passes
142 self.num_passes = len(self.passes)
143 self.garbage_collection_policy = NoGarbageCollectionPolicy()
145 def get_pass_number(self, pass_name, default=None):
146 """Return the number of the pass indicated by PASS_NAME.
148 PASS_NAME should be a string containing the name or number of a
149 pass. If a number, it should be in the range 1 <= value <=
150 self.num_passes. Return an integer in the same range. If
151 PASS_NAME is the empty string and DEFAULT is specified, return
152 DEFAULT. Raise InvalidPassError if PASS_NAME cannot be converted
153 into a valid pass number."""
155 if not pass_name and default is not None:
156 assert 1 <= default <= self.num_passes
157 return default
159 try:
160 # Does pass_name look like an integer?
161 pass_number = int(pass_name)
162 if not 1 <= pass_number <= self.num_passes:
163 raise InvalidPassError(
164 'illegal value (%d) for pass number. Must be 1 through %d or\n'
165 'the name of a known pass.'
166 % (pass_number,self.num_passes,))
167 return pass_number
168 except ValueError:
169 # Is pass_name the name of one of the passes?
170 for (i, the_pass) in enumerate(self.passes):
171 if the_pass.name == pass_name:
172 return i + 1
173 raise InvalidPassError('Unknown pass name (%r).' % (pass_name,))
175 def run(self, run_options):
176 """Run the specified passes, one after another.
178 RUN_OPTIONS will be passed to the Passes' run() methods.
179 RUN_OPTIONS.start_pass is the number of the first pass that should
180 be run. RUN_OPTIONS.end_pass is the number of the last pass that
181 should be run. It must be that 1 <= RUN_OPTIONS.start_pass <=
182 RUN_OPTIONS.end_pass <= self.num_passes."""
184 # Convert start_pass and end_pass into the indices of the passes
185 # to execute, using the Python index range convention (i.e., first
186 # pass executed and first pass *after* the ones that should be
187 # executed).
188 index_start = run_options.start_pass - 1
189 index_end = run_options.end_pass
191 # Inform the artifact manager when artifacts are created and used:
192 for (i, the_pass) in enumerate(self.passes):
193 the_pass.register_artifacts()
194 # Each pass creates a new version of the statistics file:
195 artifact_manager.register_temp_file(
196 config.STATISTICS_FILE % (i + 1,), the_pass
198 if i != 0:
199 # Each pass subsequent to the first reads the statistics file
200 # from the preceding pass:
201 artifact_manager.register_temp_file_needed(
202 config.STATISTICS_FILE % (i + 1 - 1,), the_pass
205 # Tell the artifact manager about passes that are being skipped this run:
206 for the_pass in self.passes[0:index_start]:
207 artifact_manager.pass_skipped(the_pass)
209 start_time = time.time()
210 for i in range(index_start, index_end):
211 the_pass = self.passes[i]
212 logger.quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,))
213 artifact_manager.pass_started(the_pass)
215 if i == 0:
216 stats_keeper = StatsKeeper()
217 else:
218 stats_keeper = read_stats_keeper(
219 artifact_manager.get_temp_file(
220 config.STATISTICS_FILE % (i + 1 - 1,)
224 the_pass.run(run_options, stats_keeper)
225 end_time = time.time()
226 stats_keeper.log_duration_for_pass(
227 end_time - start_time, i + 1, the_pass.name
229 logger.normal(stats_keeper.single_pass_timing(i + 1))
230 stats_keeper.archive(
231 artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,))
233 start_time = end_time
234 Ctx().clean()
235 # Allow the artifact manager to clean up artifacts that are no
236 # longer needed:
237 artifact_manager.pass_done(the_pass, Ctx().skip_cleanup)
239 self.garbage_collection_policy.check_for_garbage()
241 # Tell the artifact manager about passes that are being deferred:
242 for the_pass in self.passes[index_end:]:
243 artifact_manager.pass_deferred(the_pass)
245 logger.quiet(stats_keeper)
246 logger.normal(stats_keeper.timings())
248 # Consistency check:
249 artifact_manager.check_clean()
251 def help_passes(self):
252 """Output (to sys.stdout) the indices and names of available passes."""
254 print 'PASSES:'
255 for (i, the_pass) in enumerate(self.passes):
256 print '%5d : %s' % (i + 1, the_pass.name,)