Use ctx.tmpdir consistently in cvs2git-example.options.
[cvs2svn.git] / cvs2svn_lib / pass_manager.py
blob8d518f8b778435be2fd10dee88aebd243bd4897a
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains tools to manage the passes of a conversion."""
19 import sys
20 import time
21 import platform
22 import gc
24 from cvs2svn_lib import config
25 from cvs2svn_lib.common import FatalError
26 from cvs2svn_lib.context import Ctx
27 from cvs2svn_lib.log import logger
28 from cvs2svn_lib.stats_keeper import StatsKeeper
29 from cvs2svn_lib.stats_keeper import read_stats_keeper
30 from cvs2svn_lib.artifact_manager import artifact_manager
33 class InvalidPassError(FatalError):
34 def __init__(self, msg):
35 FatalError.__init__(
36 self, msg + '\nUse --help-passes for more information.')
39 class GarbageCollectionPolicy(object):
40 """Defines how garbage is to be handled."""
42 def check_for_garbage(self):
43 raise NotImplementedError()
46 class DefaultGarbageCollectionPolicy(GarbageCollectionPolicy):
47 """Leave the Python garbage collector at its default settings."""
49 def __init__(self):
50 logger.verbose(
51 'Leaving the Python garbage collector at its default settings'
54 def check_for_garbage(self):
55 pass
58 class NoGarbageCollectionPolicy(GarbageCollectionPolicy):
59 """Disable the Python garbage collector.
61 When check_for_garbage() is called, run the garbage collector once
62 to verify that no garbage has been created since the last call. If
63 any garbage was found, log it at the DEBUG level.
65 Since cvs2svn does not not create any circular data structures,
66 CPython's reference-counting works perfectly and garbage collection
67 is unnecessary. But the automatic runs of the garbage collector
68 have a very measurable performance cost. So we want to turn off
69 garbage collection.
71 However, it would be easy for a programming error to cause a
72 circular data structure to be created, creating garbage. So the
73 check_for_garbage() method is run once per pass to see whether
74 garbage was indeed created. If so, it reports the error at DEBUG
75 level.
77 """
79 def __init__(self):
80 logger.verbose('Disabling the Python garbage collector (it is unneeded)')
81 gc.disable()
83 def check_for_garbage(self):
84 """Check for any unreachable objects.
86 Generate a DEBUG-level warning if any were found."""
88 try:
89 gc.set_debug(gc.DEBUG_SAVEALL)
90 gc_count = gc.collect()
91 if gc_count:
92 if logger.is_on(logger.DEBUG):
93 logger.debug(
94 'INTERNAL: %d unreachable object(s) were garbage collected:'
95 % (gc_count,)
97 for g in gc.garbage:
98 logger.debug(' %s' % (g,))
99 del gc.garbage[:]
100 except (AttributeError, NotImplementedError):
101 # Other Python implementations implement garbage collection
102 # differently, so if errors occur just ignore them.
103 pass
106 def choose_garbage_collection_policy():
107 """Return the of GarbageCollectionPolicy to be used.
109 For CPython, we want to use NoGarbageCollectionPolicy. But other
110 Python implementations (e.g., Jython, PyPy, IronPython) do not
111 necessarily use reference-counting for memory management, in which
112 case it is not possible to turn off the garbage collector. So on
113 those platforms, use the DefaultGarbageCollectionPolicy."""
115 try:
116 implementation = platform.python_implementation()
117 except AttributeError:
118 # platform.python_implementation() was only added in Python 2.6.
119 # So if that call failed, we should leave garbage collection on
120 # just to be on the safe side.
121 implementation = None
123 if implementation == 'CPython':
124 return NoGarbageCollectionPolicy()
125 else:
126 logger.verbose('Leaving Python garbage collection at its default settings')
127 return DefaultGarbageCollectionPolicy()
130 class Pass(object):
131 """Base class for one step of the conversion."""
133 def __init__(self):
134 # By default, use the pass object's class name as the pass name:
135 self.name = self.__class__.__name__
137 def register_artifacts(self):
138 """Register artifacts (created and needed) in artifact_manager."""
140 raise NotImplementedError()
142 def _register_temp_file(self, basename):
143 """Helper method; for brevity only."""
145 artifact_manager.register_temp_file(basename, self)
147 def _register_temp_file_needed(self, basename):
148 """Helper method; for brevity only."""
150 artifact_manager.register_temp_file_needed(basename, self)
152 def run(self, run_options, stats_keeper):
153 """Carry out this step of the conversion.
155 RUN_OPTIONS is an instance of RunOptions. STATS_KEEPER is an
156 instance of StatsKeeper."""
158 raise NotImplementedError()
161 class PassManager:
162 """Manage a list of passes that can be executed separately or all at once.
164 Passes are numbered starting with 1."""
166 def __init__(self, passes):
167 """Construct a PassManager with the specified PASSES.
169 Internally, passes are numbered starting with 1. So PASSES[0] is
170 considered to be pass number 1."""
172 self.passes = passes
173 self.num_passes = len(self.passes)
174 self.garbage_collection_policy = choose_garbage_collection_policy()
176 def get_pass_number(self, pass_name, default=None):
177 """Return the number of the pass indicated by PASS_NAME.
179 PASS_NAME should be a string containing the name or number of a
180 pass. If a number, it should be in the range 1 <= value <=
181 self.num_passes. Return an integer in the same range. If
182 PASS_NAME is the empty string and DEFAULT is specified, return
183 DEFAULT. Raise InvalidPassError if PASS_NAME cannot be converted
184 into a valid pass number."""
186 if not pass_name and default is not None:
187 assert 1 <= default <= self.num_passes
188 return default
190 try:
191 # Does pass_name look like an integer?
192 pass_number = int(pass_name)
193 if not 1 <= pass_number <= self.num_passes:
194 raise InvalidPassError(
195 'illegal value (%d) for pass number. Must be 1 through %d or\n'
196 'the name of a known pass.'
197 % (pass_number,self.num_passes,))
198 return pass_number
199 except ValueError:
200 # Is pass_name the name of one of the passes?
201 for (i, the_pass) in enumerate(self.passes):
202 if the_pass.name == pass_name:
203 return i + 1
204 raise InvalidPassError('Unknown pass name (%r).' % (pass_name,))
206 def run(self, run_options):
207 """Run the specified passes, one after another.
209 RUN_OPTIONS will be passed to the Passes' run() methods.
210 RUN_OPTIONS.start_pass is the number of the first pass that should
211 be run. RUN_OPTIONS.end_pass is the number of the last pass that
212 should be run. It must be that 1 <= RUN_OPTIONS.start_pass <=
213 RUN_OPTIONS.end_pass <= self.num_passes."""
215 # Convert start_pass and end_pass into the indices of the passes
216 # to execute, using the Python index range convention (i.e., first
217 # pass executed and first pass *after* the ones that should be
218 # executed).
219 index_start = run_options.start_pass - 1
220 index_end = run_options.end_pass
222 # Inform the artifact manager when artifacts are created and used:
223 for (i, the_pass) in enumerate(self.passes):
224 the_pass.register_artifacts()
225 # Each pass creates a new version of the statistics file:
226 artifact_manager.register_temp_file(
227 config.STATISTICS_FILE % (i + 1,), the_pass
229 if i != 0:
230 # Each pass subsequent to the first reads the statistics file
231 # from the preceding pass:
232 artifact_manager.register_temp_file_needed(
233 config.STATISTICS_FILE % (i + 1 - 1,), the_pass
236 # Tell the artifact manager about passes that are being skipped this run:
237 for the_pass in self.passes[0:index_start]:
238 artifact_manager.pass_skipped(the_pass)
240 start_time = time.time()
241 for i in range(index_start, index_end):
242 the_pass = self.passes[i]
243 logger.quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,))
244 artifact_manager.pass_started(the_pass)
246 if i == 0:
247 stats_keeper = StatsKeeper()
248 else:
249 stats_keeper = read_stats_keeper(
250 artifact_manager.get_temp_file(
251 config.STATISTICS_FILE % (i + 1 - 1,)
255 the_pass.run(run_options, stats_keeper)
256 end_time = time.time()
257 stats_keeper.log_duration_for_pass(
258 end_time - start_time, i + 1, the_pass.name
260 logger.normal(stats_keeper.single_pass_timing(i + 1))
261 stats_keeper.archive(
262 artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,))
264 start_time = end_time
265 Ctx().clean()
266 # Allow the artifact manager to clean up artifacts that are no
267 # longer needed:
268 artifact_manager.pass_done(the_pass, Ctx().skip_cleanup)
270 self.garbage_collection_policy.check_for_garbage()
272 # Tell the artifact manager about passes that are being deferred:
273 for the_pass in self.passes[index_end:]:
274 artifact_manager.pass_deferred(the_pass)
276 logger.quiet(stats_keeper)
277 logger.normal(stats_keeper.timings())
279 # Consistency check:
280 artifact_manager.check_clean()
282 def help_passes(self):
283 """Output (to sys.stdout) the indices and names of available passes."""
285 print 'PASSES:'
286 for (i, the_pass) in enumerate(self.passes):
287 print '%5d : %s' % (i + 1, the_pass.name,)