cvs2svn_lib: ensure that files get closed.
[cvs2svn.git] / cvs2svn_lib / passes.py
blob7f8ddbc89fba7828cb1219501de100e63f7c317b
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import logger
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.repository_walker import walk_repository
78 from cvs2svn_lib.collect_data import CollectData
79 from cvs2svn_lib.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self):
89 self._register_temp_file(config.PROJECTS)
90 self._register_temp_file(config.SYMBOL_STATISTICS)
91 self._register_temp_file(config.METADATA_INDEX_TABLE)
92 self._register_temp_file(config.METADATA_STORE)
93 self._register_temp_file(config.CVS_PATHS_DB)
94 self._register_temp_file(config.CVS_ITEMS_STORE)
96 def run(self, run_options, stats_keeper):
97 logger.quiet("Examining all CVS ',v' files...")
98 Ctx()._projects = {}
99 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
100 cd = CollectData(stats_keeper)
102 # Key generator for CVSFiles:
103 file_key_generator = KeyGenerator()
105 for project in run_options.projects:
106 Ctx()._projects[project.id] = project
107 cd.process_project(
108 project,
109 walk_repository(project, file_key_generator, cd.record_fatal_error),
111 run_options.projects = None
113 fatal_errors = cd.close()
115 if fatal_errors:
116 raise FatalException("Pass 1 complete.\n"
117 + "=" * 75 + "\n"
118 + "Error summary:\n"
119 + "\n".join(fatal_errors) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs_path_db.close()
123 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
124 logger.quiet("Done")
127 class CleanMetadataPass(Pass):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self):
131 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
132 self._register_temp_file(config.METADATA_CLEAN_STORE)
133 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
134 self._register_temp_file_needed(config.METADATA_STORE)
136 def _get_clean_author(self, author):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
142 try:
143 return self._authors[author]
144 except KeyError:
145 pass
147 try:
148 clean_author = Ctx().cvs_author_decoder(author)
149 except UnicodeError:
150 self._authors[author] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
153 try:
154 clean_author = clean_author.encode('utf8')
155 except UnicodeError:
156 self._authors[author] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
159 self._authors[author] = clean_author
160 return clean_author
162 def _get_clean_log_msg(self, log_msg):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
168 try:
169 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
170 except UnicodeError:
171 raise UnicodeError(
172 'Problem decoding log message:\n'
173 '%s\n'
174 '%s\n'
175 '%s'
176 % ('-' * 75, log_msg, '-' * 75,)
179 try:
180 return clean_log_msg.encode('utf8')
181 except UnicodeError:
182 raise UnicodeError(
183 'Problem encoding log message:\n'
184 '%s\n'
185 '%s\n'
186 '%s'
187 % ('-' * 75, log_msg, '-' * 75,)
190 def _clean_metadata(self, metadata):
191 """Clean up METADATA by overwriting its members as necessary."""
193 try:
194 metadata.author = self._get_clean_author(metadata.author)
195 except UnicodeError, e:
196 logger.warn('%s: %s' % (warning_prefix, e,))
197 self.warnings = True
199 try:
200 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
201 except UnicodeError, e:
202 logger.warn('%s: %s' % (warning_prefix, e,))
203 self.warnings = True
205 def run(self, run_options, stats_keeper):
206 logger.quiet("Converting metadata to UTF8...")
207 metadata_db = MetadataDatabase(
208 artifact_manager.get_temp_file(config.METADATA_STORE),
209 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
210 DB_OPEN_READ,
212 metadata_clean_db = MetadataDatabase(
213 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
214 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
215 DB_OPEN_NEW,
218 self.warnings = False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
222 self._authors = {}
224 for id in metadata_db.iterkeys():
225 metadata = metadata_db[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata.original_author = metadata.author
231 self._clean_metadata(metadata)
233 metadata_clean_db[id] = metadata
235 if self.warnings:
236 raise FatalError(
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db.close()
244 metadata_db.close()
245 logger.quiet("Done")
248 class CollateSymbolsPass(Pass):
249 """Divide symbols into branches, tags, and excludes."""
251 conversion_names = {
252 Trunk : 'trunk',
253 Branch : 'branch',
254 Tag : 'tag',
255 ExcludedSymbol : 'exclude',
256 Symbol : '.',
259 def register_artifacts(self):
260 self._register_temp_file(config.SYMBOL_DB)
261 self._register_temp_file_needed(config.PROJECTS)
262 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
264 def get_symbol(self, run_options, stats):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
275 symbol = stats.lod
276 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
277 for rule in rules:
278 symbol = rule.get_symbol(symbol, stats)
279 assert symbol is not None
281 stats.check_valid(symbol)
283 return symbol
285 def log_symbol_summary(self, stats, symbol):
286 if not self.symbol_info_file:
287 return
289 if isinstance(symbol, Trunk):
290 name = '.trunk.'
291 preferred_parent_name = '.'
292 else:
293 name = stats.lod.name
294 if symbol.preferred_parent_id is None:
295 preferred_parent_name = '.'
296 else:
297 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
298 if isinstance(preferred_parent, Trunk):
299 preferred_parent_name = '.trunk.'
300 else:
301 preferred_parent_name = preferred_parent.name
303 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
304 symbol_path = symbol.base_path
305 else:
306 symbol_path = '.'
308 self.symbol_info_file.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats.lod.project.id,
311 name,
312 self.conversion_names[symbol.__class__],
313 symbol_path,
314 preferred_parent_name,
317 self.symbol_info_file.write(' # %s\n' % (stats,))
318 parent_counts = stats.possible_parents.items()
319 if parent_counts:
320 self.symbol_info_file.write(' # Possible parents:\n')
321 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
322 for (pp, count) in parent_counts:
323 if isinstance(pp, Trunk):
324 self.symbol_info_file.write(
325 ' # .trunk. : %d\n' % (count,)
327 else:
328 self.symbol_info_file.write(
329 ' # %s : %d\n' % (pp.name, count,)
332 def get_symbols(self, run_options):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
339 SYMBOL_STATS.
341 Raise FatalError if there was an error."""
343 errors = []
344 mismatches = []
346 if Ctx().symbol_info_filename is not None:
347 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
348 self.symbol_info_file.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
352 else:
353 self.symbol_info_file = None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
358 rules = {}
359 for rule_list in run_options.project_symbol_strategy_rules:
360 for rule in rule_list:
361 rules[id(rule)] = rule
363 for rule in rules.itervalues():
364 rule.start(self.symbol_stats)
366 retval = {}
368 for stats in self.symbol_stats:
369 try:
370 symbol = self.get_symbol(run_options, stats)
371 except IndeterminateSymbolException, e:
372 self.log_symbol_summary(stats, stats.lod)
373 mismatches.append(e.stats)
374 except SymbolPlanError, e:
375 self.log_symbol_summary(stats, stats.lod)
376 errors.append(e)
377 else:
378 self.log_symbol_summary(stats, symbol)
379 retval[stats.lod] = symbol
381 for rule in rules.itervalues():
382 rule.finish()
384 if self.symbol_info_file:
385 self.symbol_info_file.close()
387 del self.symbol_info_file
389 if errors or mismatches:
390 s = ['Problems determining how symbols should be converted:\n']
391 for e in errors:
392 s.append('%s\n' % (e,))
393 if mismatches:
394 s.append(
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
398 'and/or\n'
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats in mismatches:
402 s.append(' %s\n' % (stats,))
403 raise FatalError(''.join(s))
404 else:
405 return retval
407 def run(self, run_options, stats_keeper):
408 Ctx()._projects = read_projects(
409 artifact_manager.get_temp_file(config.PROJECTS)
411 self.symbol_stats = SymbolStatistics(
412 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
415 symbol_map = self.get_symbols(run_options)
417 # Check the symbols for consistency and bail out if there were errors:
418 self.symbol_stats.check_consistency(symbol_map)
420 # Check that the symbols all have SVN paths set and that the paths
421 # are disjoint:
422 Ctx().output_option.check_symbols(symbol_map)
424 for symbol in symbol_map.itervalues():
425 if isinstance(symbol, ExcludedSymbol):
426 self.symbol_stats.exclude_symbol(symbol)
428 create_symbol_database(symbol_map.values())
430 del self.symbol_stats
432 logger.quiet("Done")
435 class FilterSymbolsPass(Pass):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self):
442 self._register_temp_file(config.ITEM_SERIALIZER)
443 self._register_temp_file(config.CVS_REVS_DATAFILE)
444 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
445 self._register_temp_file_needed(config.PROJECTS)
446 self._register_temp_file_needed(config.SYMBOL_DB)
447 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
448 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
449 self._register_temp_file_needed(config.CVS_PATHS_DB)
450 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
451 Ctx().revision_collector.register_artifacts(self)
453 def run(self, run_options, stats_keeper):
454 Ctx()._projects = read_projects(
455 artifact_manager.get_temp_file(config.PROJECTS)
457 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
458 Ctx()._metadata_db = MetadataDatabase(
459 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
460 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
461 DB_OPEN_READ,
463 Ctx()._symbol_db = SymbolDatabase()
464 cvs_item_store = OldCVSItemStore(
465 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
467 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
468 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
469 cPickle.dump(cvs_item_serializer, f, -1)
470 f.close()
472 rev_db = NewSortableCVSRevisionDatabase(
473 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
474 cvs_item_serializer,
477 symbol_db = NewSortableCVSSymbolDatabase(
478 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
479 cvs_item_serializer,
482 revision_collector = Ctx().revision_collector
484 logger.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper.reset_cvs_rev_info()
487 revision_collector.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
491 logger.verbose(cvs_file_items.cvs_file.rcs_path)
492 cvs_file_items.filter_excluded_symbols()
493 cvs_file_items.mutate_symbols()
494 cvs_file_items.adjust_parents()
495 cvs_file_items.refine_symbols()
496 cvs_file_items.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items.record_opened_symbols()
500 cvs_file_items.record_closed_symbols()
501 cvs_file_items.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
504 # file:
505 revision_collector.process_file(cvs_file_items)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
509 for cvs_item in cvs_file_items.values():
510 stats_keeper.record_cvs_item(cvs_item)
512 if isinstance(cvs_item, CVSRevision):
513 rev_db.add(cvs_item)
514 elif isinstance(cvs_item, CVSSymbol):
515 symbol_db.add(cvs_item)
517 stats_keeper.set_stats_reflect_exclude(True)
519 rev_db.close()
520 symbol_db.close()
521 revision_collector.finish()
522 cvs_item_store.close()
523 Ctx()._symbol_db.close()
524 Ctx()._cvs_path_db.close()
526 logger.quiet("Done")
529 class SortRevisionsPass(Pass):
530 """Sort the revisions file."""
532 def register_artifacts(self):
533 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
534 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
536 def run(self, run_options, stats_keeper):
537 logger.quiet("Sorting CVS revision summaries...")
538 sort_file(
539 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
540 artifact_manager.get_temp_file(
541 config.CVS_REVS_SORTED_DATAFILE
543 tempdirs=[Ctx().tmpdir],
545 logger.quiet("Done")
548 class SortSymbolsPass(Pass):
549 """Sort the symbols file."""
551 def register_artifacts(self):
552 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
553 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
555 def run(self, run_options, stats_keeper):
556 logger.quiet("Sorting CVS symbol summaries...")
557 sort_file(
558 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
559 artifact_manager.get_temp_file(
560 config.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs=[Ctx().tmpdir],
564 logger.quiet("Done")
567 class InitializeChangesetsPass(Pass):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self):
571 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
572 self._register_temp_file(config.CHANGESETS_STORE)
573 self._register_temp_file(config.CHANGESETS_INDEX)
574 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
575 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
576 self._register_temp_file_needed(config.PROJECTS)
577 self._register_temp_file_needed(config.SYMBOL_DB)
578 self._register_temp_file_needed(config.CVS_PATHS_DB)
579 self._register_temp_file_needed(config.ITEM_SERIALIZER)
580 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
581 self._register_temp_file_needed(
582 config.CVS_SYMBOLS_SORTED_DATAFILE)
584 def get_revision_changesets(self):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id = None
592 old_timestamp = None
593 changeset_items = []
595 db = OldSortableCVSRevisionDatabase(
596 artifact_manager.get_temp_file(
597 config.CVS_REVS_SORTED_DATAFILE
599 self.cvs_item_serializer,
602 for cvs_rev in db:
603 if cvs_rev.metadata_id != old_metadata_id \
604 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
605 # Start a new changeset. First finish up the old changeset,
606 # if any:
607 if changeset_items:
608 yield changeset_items
609 changeset_items = []
610 old_metadata_id = cvs_rev.metadata_id
611 changeset_items.append(cvs_rev)
612 old_timestamp = cvs_rev.timestamp
614 # Finish up the last changeset, if any:
615 if changeset_items:
616 yield changeset_items
618 def get_symbol_changesets(self):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
624 old_symbol_id = None
625 changeset_items = []
627 db = OldSortableCVSSymbolDatabase(
628 artifact_manager.get_temp_file(
629 config.CVS_SYMBOLS_SORTED_DATAFILE
631 self.cvs_item_serializer,
634 for cvs_symbol in db:
635 if cvs_symbol.symbol.id != old_symbol_id:
636 # Start a new changeset. First finish up the old changeset,
637 # if any:
638 if changeset_items:
639 yield changeset_items
640 changeset_items = []
641 old_symbol_id = cvs_symbol.symbol.id
642 changeset_items.append(cvs_symbol)
644 # Finish up the last changeset, if any:
645 if changeset_items:
646 yield changeset_items
648 @staticmethod
649 def compare_items(a, b):
650 return (
651 cmp(a.timestamp, b.timestamp)
652 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
653 or cmp([int(x) for x in a.rev.split('.')],
654 [int(x) for x in b.rev.split('.')])
655 or cmp(a.id, b.id))
657 def break_internal_dependencies(self, changeset_items):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
673 # on each other.
674 dependencies = []
675 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
676 for cvs_item in changeset_items:
677 for next_id in cvs_item.get_succ_ids():
678 if next_id in changeset_cvs_item_ids:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id == cvs_item.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item,))
683 dependencies.append((cvs_item.id, next_id,))
685 if dependencies:
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items.sort(self.compare_items)
689 indexes = {}
690 for (i, changeset_item) in enumerate(changeset_items):
691 indexes[changeset_item.id] = i
692 # How many internal dependencies would be broken by breaking the
693 # Changeset after a particular index?
694 breaks = [0] * len(changeset_items)
695 for (pred, succ,) in dependencies:
696 pred_index = indexes[pred]
697 succ_index = indexes[succ]
698 breaks[min(pred_index, succ_index)] += 1
699 breaks[max(pred_index, succ_index)] -= 1
700 best_i = None
701 best_count = -1
702 best_time = 0
703 for i in range(1, len(breaks)):
704 breaks[i] += breaks[i - 1]
705 for i in range(0, len(breaks) - 1):
706 if breaks[i] > best_count:
707 best_i = i
708 best_count = breaks[i]
709 best_time = (changeset_items[i + 1].timestamp
710 - changeset_items[i].timestamp)
711 elif breaks[i] == best_count \
712 and (changeset_items[i + 1].timestamp
713 - changeset_items[i].timestamp) < best_time:
714 best_i = i
715 best_count = breaks[i]
716 best_time = (changeset_items[i + 1].timestamp
717 - changeset_items[i].timestamp)
718 # Reuse the old changeset.id for the first of the split changesets.
719 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
720 else:
721 return [changeset_items]
723 def break_all_internal_dependencies(self, changeset_items):
724 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
726 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
727 be part of a single changeset. Break this list into sublists,
728 where the CVSRevisions in each sublist are free of mutual
729 dependencies."""
731 # This method is written non-recursively to avoid any possible
732 # problems with recursion depth.
734 changesets_to_split = [changeset_items]
735 while changesets_to_split:
736 changesets = self.break_internal_dependencies(changesets_to_split.pop())
737 if len(changesets) == 1:
738 [changeset_items] = changesets
739 yield changeset_items
740 else:
741 # The changeset had to be split; see if either of the
742 # fragments have to be split:
743 changesets.reverse()
744 changesets_to_split.extend(changesets)
746 def get_changesets(self):
747 """Generate (Changeset, [CVSItem,...]) for all changesets.
749 The Changesets already have their internal dependencies broken.
750 The [CVSItem,...] list is the list of CVSItems in the
751 corresponding Changeset."""
753 for changeset_items in self.get_revision_changesets():
754 for split_changeset_items \
755 in self.break_all_internal_dependencies(changeset_items):
756 yield (
757 RevisionChangeset(
758 self.changeset_key_generator.gen_id(),
759 [cvs_rev.id for cvs_rev in split_changeset_items]
761 split_changeset_items,
764 for changeset_items in self.get_symbol_changesets():
765 yield (
766 create_symbol_changeset(
767 self.changeset_key_generator.gen_id(),
768 changeset_items[0].symbol,
769 [cvs_symbol.id for cvs_symbol in changeset_items]
771 changeset_items,
774 def run(self, run_options, stats_keeper):
775 logger.quiet("Creating preliminary commit sets...")
777 Ctx()._projects = read_projects(
778 artifact_manager.get_temp_file(config.PROJECTS)
780 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
781 Ctx()._symbol_db = SymbolDatabase()
783 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
784 self.cvs_item_serializer = cPickle.load(f)
785 f.close()
787 changeset_db = ChangesetDatabase(
788 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
789 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
790 DB_OPEN_NEW,
792 cvs_item_to_changeset_id = CVSItemToChangesetTable(
793 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
794 DB_OPEN_NEW,
797 self.sorted_cvs_items_db = IndexedCVSItemStore(
798 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
799 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
800 DB_OPEN_NEW)
802 self.changeset_key_generator = KeyGenerator()
804 for (changeset, changeset_items) in self.get_changesets():
805 if logger.is_on(logger.DEBUG):
806 logger.debug(repr(changeset))
807 changeset_db.store(changeset)
808 for cvs_item in changeset_items:
809 self.sorted_cvs_items_db.add(cvs_item)
810 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
812 self.sorted_cvs_items_db.close()
813 cvs_item_to_changeset_id.close()
814 changeset_db.close()
815 Ctx()._symbol_db.close()
816 Ctx()._cvs_path_db.close()
818 del self.cvs_item_serializer
820 logger.quiet("Done")
823 class ProcessedChangesetLogger:
824 def __init__(self):
825 self.processed_changeset_ids = []
827 def log(self, changeset_id):
828 if logger.is_on(logger.DEBUG):
829 self.processed_changeset_ids.append(changeset_id)
831 def flush(self):
832 if self.processed_changeset_ids:
833 logger.debug(
834 'Consumed changeset ids %s'
835 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
837 del self.processed_changeset_ids[:]
840 class BreakRevisionChangesetCyclesPass(Pass):
841 """Break up any dependency cycles involving only RevisionChangesets."""
843 def register_artifacts(self):
844 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
845 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
846 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
847 self._register_temp_file_needed(config.PROJECTS)
848 self._register_temp_file_needed(config.SYMBOL_DB)
849 self._register_temp_file_needed(config.CVS_PATHS_DB)
850 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
851 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
852 self._register_temp_file_needed(config.CHANGESETS_STORE)
853 self._register_temp_file_needed(config.CHANGESETS_INDEX)
854 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
856 def get_source_changesets(self):
857 old_changeset_db = ChangesetDatabase(
858 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
859 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
860 DB_OPEN_READ)
862 changeset_ids = old_changeset_db.keys()
864 for changeset_id in changeset_ids:
865 yield old_changeset_db[changeset_id]
867 old_changeset_db.close()
868 del old_changeset_db
870 def break_cycle(self, cycle):
871 """Break up one or more changesets in CYCLE to help break the cycle.
873 CYCLE is a list of Changesets where
875 cycle[i] depends on cycle[i - 1]
877 Break up one or more changesets in CYCLE to make progress towards
878 breaking the cycle. Update self.changeset_graph accordingly.
880 It is not guaranteed that the cycle will be broken by one call to
881 this routine, but at least some progress must be made."""
883 self.processed_changeset_logger.flush()
884 best_i = None
885 best_link = None
886 for i in range(len(cycle)):
887 # It's OK if this index wraps to -1:
888 link = ChangesetGraphLink(
889 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
891 if best_i is None or link < best_link:
892 best_i = i
893 best_link = link
895 if logger.is_on(logger.DEBUG):
896 logger.debug(
897 'Breaking cycle %s by breaking node %x' % (
898 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
899 best_link.changeset.id,))
901 new_changesets = best_link.break_changeset(self.changeset_key_generator)
903 self.changeset_graph.delete_changeset(best_link.changeset)
905 for changeset in new_changesets:
906 self.changeset_graph.add_new_changeset(changeset)
908 def run(self, run_options, stats_keeper):
909 logger.quiet("Breaking revision changeset dependency cycles...")
911 Ctx()._projects = read_projects(
912 artifact_manager.get_temp_file(config.PROJECTS)
914 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
915 Ctx()._symbol_db = SymbolDatabase()
916 Ctx()._cvs_items_db = IndexedCVSItemStore(
917 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
918 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
919 DB_OPEN_READ)
921 shutil.copyfile(
922 artifact_manager.get_temp_file(
923 config.CVS_ITEM_TO_CHANGESET),
924 artifact_manager.get_temp_file(
925 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
926 cvs_item_to_changeset_id = CVSItemToChangesetTable(
927 artifact_manager.get_temp_file(
928 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
929 DB_OPEN_WRITE)
931 changeset_db = ChangesetDatabase(
932 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
933 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
934 DB_OPEN_NEW)
936 self.changeset_graph = ChangesetGraph(
937 changeset_db, cvs_item_to_changeset_id
940 max_changeset_id = 0
941 for changeset in self.get_source_changesets():
942 changeset_db.store(changeset)
943 if isinstance(changeset, RevisionChangeset):
944 self.changeset_graph.add_changeset(changeset)
945 max_changeset_id = max(max_changeset_id, changeset.id)
947 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
949 self.processed_changeset_logger = ProcessedChangesetLogger()
951 # Consume the graph, breaking cycles using self.break_cycle():
952 for (changeset, time_range) in self.changeset_graph.consume_graph(
953 cycle_breaker=self.break_cycle
955 self.processed_changeset_logger.log(changeset.id)
957 self.processed_changeset_logger.flush()
958 del self.processed_changeset_logger
960 self.changeset_graph.close()
961 self.changeset_graph = None
962 Ctx()._cvs_items_db.close()
963 Ctx()._symbol_db.close()
964 Ctx()._cvs_path_db.close()
966 logger.quiet("Done")
969 class RevisionTopologicalSortPass(Pass):
970 """Sort RevisionChangesets into commit order.
972 Also convert them to OrderedChangesets, without changing their ids."""
974 def register_artifacts(self):
975 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
976 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
977 self._register_temp_file_needed(config.PROJECTS)
978 self._register_temp_file_needed(config.SYMBOL_DB)
979 self._register_temp_file_needed(config.CVS_PATHS_DB)
980 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
981 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
982 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
983 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
984 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
986 def get_source_changesets(self, changeset_db):
987 changeset_ids = changeset_db.keys()
989 for changeset_id in changeset_ids:
990 yield changeset_db[changeset_id]
992 def get_changesets(self):
993 changeset_db = ChangesetDatabase(
994 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
995 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
996 DB_OPEN_READ,
999 changeset_graph = ChangesetGraph(
1000 changeset_db,
1001 CVSItemToChangesetTable(
1002 artifact_manager.get_temp_file(
1003 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
1005 DB_OPEN_READ,
1009 for changeset in self.get_source_changesets(changeset_db):
1010 if isinstance(changeset, RevisionChangeset):
1011 changeset_graph.add_changeset(changeset)
1012 else:
1013 yield changeset
1015 changeset_ids = []
1017 # Sentry:
1018 changeset_ids.append(None)
1020 for (changeset, time_range) in changeset_graph.consume_graph():
1021 changeset_ids.append(changeset.id)
1023 # Sentry:
1024 changeset_ids.append(None)
1026 for i in range(1, len(changeset_ids) - 1):
1027 changeset = changeset_db[changeset_ids[i]]
1028 yield OrderedChangeset(
1029 changeset.id, changeset.cvs_item_ids, i - 1,
1030 changeset_ids[i - 1], changeset_ids[i + 1])
1032 changeset_graph.close()
1034 def run(self, run_options, stats_keeper):
1035 logger.quiet("Generating CVSRevisions in commit order...")
1037 Ctx()._projects = read_projects(
1038 artifact_manager.get_temp_file(config.PROJECTS)
1040 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1041 Ctx()._symbol_db = SymbolDatabase()
1042 Ctx()._cvs_items_db = IndexedCVSItemStore(
1043 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1044 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1045 DB_OPEN_READ)
1047 changesets_revordered_db = ChangesetDatabase(
1048 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1049 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1050 DB_OPEN_NEW)
1052 for changeset in self.get_changesets():
1053 changesets_revordered_db.store(changeset)
1055 changesets_revordered_db.close()
1056 Ctx()._cvs_items_db.close()
1057 Ctx()._symbol_db.close()
1058 Ctx()._cvs_path_db.close()
1060 logger.quiet("Done")
1063 class BreakSymbolChangesetCyclesPass(Pass):
1064 """Break up any dependency cycles involving only SymbolChangesets."""
1066 def register_artifacts(self):
1067 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1068 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1069 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1070 self._register_temp_file_needed(config.PROJECTS)
1071 self._register_temp_file_needed(config.SYMBOL_DB)
1072 self._register_temp_file_needed(config.CVS_PATHS_DB)
1073 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1074 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1075 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1076 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1077 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1079 def get_source_changesets(self):
1080 old_changeset_db = ChangesetDatabase(
1081 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1082 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1083 DB_OPEN_READ)
1085 changeset_ids = old_changeset_db.keys()
1087 for changeset_id in changeset_ids:
1088 yield old_changeset_db[changeset_id]
1090 old_changeset_db.close()
1092 def break_cycle(self, cycle):
1093 """Break up one or more changesets in CYCLE to help break the cycle.
1095 CYCLE is a list of Changesets where
1097 cycle[i] depends on cycle[i - 1]
1099 Break up one or more changesets in CYCLE to make progress towards
1100 breaking the cycle. Update self.changeset_graph accordingly.
1102 It is not guaranteed that the cycle will be broken by one call to
1103 this routine, but at least some progress must be made."""
1105 self.processed_changeset_logger.flush()
1106 best_i = None
1107 best_link = None
1108 for i in range(len(cycle)):
1109 # It's OK if this index wraps to -1:
1110 link = ChangesetGraphLink(
1111 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1113 if best_i is None or link < best_link:
1114 best_i = i
1115 best_link = link
1117 if logger.is_on(logger.DEBUG):
1118 logger.debug(
1119 'Breaking cycle %s by breaking node %x' % (
1120 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1121 best_link.changeset.id,))
1123 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1125 self.changeset_graph.delete_changeset(best_link.changeset)
1127 for changeset in new_changesets:
1128 self.changeset_graph.add_new_changeset(changeset)
1130 def run(self, run_options, stats_keeper):
1131 logger.quiet("Breaking symbol changeset dependency cycles...")
1133 Ctx()._projects = read_projects(
1134 artifact_manager.get_temp_file(config.PROJECTS)
1136 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1137 Ctx()._symbol_db = SymbolDatabase()
1138 Ctx()._cvs_items_db = IndexedCVSItemStore(
1139 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1140 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1141 DB_OPEN_READ)
1143 shutil.copyfile(
1144 artifact_manager.get_temp_file(
1145 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1146 artifact_manager.get_temp_file(
1147 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1148 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1149 artifact_manager.get_temp_file(
1150 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1151 DB_OPEN_WRITE)
1153 changeset_db = ChangesetDatabase(
1154 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1155 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1156 DB_OPEN_NEW)
1158 self.changeset_graph = ChangesetGraph(
1159 changeset_db, cvs_item_to_changeset_id
1162 max_changeset_id = 0
1163 for changeset in self.get_source_changesets():
1164 changeset_db.store(changeset)
1165 if isinstance(changeset, SymbolChangeset):
1166 self.changeset_graph.add_changeset(changeset)
1167 max_changeset_id = max(max_changeset_id, changeset.id)
1169 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1171 self.processed_changeset_logger = ProcessedChangesetLogger()
1173 # Consume the graph, breaking cycles using self.break_cycle():
1174 for (changeset, time_range) in self.changeset_graph.consume_graph(
1175 cycle_breaker=self.break_cycle
1177 self.processed_changeset_logger.log(changeset.id)
1179 self.processed_changeset_logger.flush()
1180 del self.processed_changeset_logger
1182 self.changeset_graph.close()
1183 self.changeset_graph = None
1184 Ctx()._cvs_items_db.close()
1185 Ctx()._symbol_db.close()
1186 Ctx()._cvs_path_db.close()
1188 logger.quiet("Done")
1191 class BreakAllChangesetCyclesPass(Pass):
1192 """Break up any dependency cycles that are closed by SymbolChangesets."""
1194 def register_artifacts(self):
1195 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1196 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1197 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1198 self._register_temp_file_needed(config.PROJECTS)
1199 self._register_temp_file_needed(config.SYMBOL_DB)
1200 self._register_temp_file_needed(config.CVS_PATHS_DB)
1201 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1202 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1203 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1204 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1205 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1207 def get_source_changesets(self):
1208 old_changeset_db = ChangesetDatabase(
1209 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1210 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1211 DB_OPEN_READ)
1213 changeset_ids = old_changeset_db.keys()
1215 for changeset_id in changeset_ids:
1216 yield old_changeset_db[changeset_id]
1218 old_changeset_db.close()
1220 def _split_retrograde_changeset(self, changeset):
1221 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1223 logger.debug('Breaking retrograde changeset %x' % (changeset.id,))
1225 self.changeset_graph.delete_changeset(changeset)
1227 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1228 ordinal_limits = {}
1229 for cvs_branch in changeset.iter_cvs_items():
1230 max_pred_ordinal = 0
1231 min_succ_ordinal = sys.maxint
1233 for pred_id in cvs_branch.get_pred_ids():
1234 pred_ordinal = self.ordinals.get(
1235 self.cvs_item_to_changeset_id[pred_id], 0)
1236 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1238 for succ_id in cvs_branch.get_succ_ids():
1239 succ_ordinal = self.ordinals.get(
1240 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1241 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1243 assert max_pred_ordinal < min_succ_ordinal
1244 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1246 # Find the earliest successor ordinal:
1247 min_min_succ_ordinal = sys.maxint
1248 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1249 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1251 early_item_ids = []
1252 late_item_ids = []
1253 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1254 if max_pred_ordinal >= min_min_succ_ordinal:
1255 late_item_ids.append(id)
1256 else:
1257 early_item_ids.append(id)
1259 assert early_item_ids
1260 assert late_item_ids
1262 early_changeset = changeset.create_split_changeset(
1263 self.changeset_key_generator.gen_id(), early_item_ids)
1264 late_changeset = changeset.create_split_changeset(
1265 self.changeset_key_generator.gen_id(), late_item_ids)
1267 self.changeset_graph.add_new_changeset(early_changeset)
1268 self.changeset_graph.add_new_changeset(late_changeset)
1270 early_split = self._split_if_retrograde(early_changeset.id)
1272 # Because of the way we constructed it, the early changeset should
1273 # not have to be split:
1274 assert not early_split
1276 self._split_if_retrograde(late_changeset.id)
1278 def _split_if_retrograde(self, changeset_id):
1279 node = self.changeset_graph[changeset_id]
1280 pred_ordinals = [
1281 self.ordinals[id]
1282 for id in node.pred_ids
1283 if id in self.ordinals
1285 pred_ordinals.sort()
1286 succ_ordinals = [
1287 self.ordinals[id]
1288 for id in node.succ_ids
1289 if id in self.ordinals
1291 succ_ordinals.sort()
1292 if pred_ordinals and succ_ordinals \
1293 and pred_ordinals[-1] >= succ_ordinals[0]:
1294 self._split_retrograde_changeset(self.changeset_db[node.id])
1295 return True
1296 else:
1297 return False
1299 def break_segment(self, segment):
1300 """Break a changeset in SEGMENT[1:-1].
1302 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1303 that range are SymbolChangesets."""
1305 best_i = None
1306 best_link = None
1307 for i in range(1, len(segment) - 1):
1308 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1310 if best_i is None or link < best_link:
1311 best_i = i
1312 best_link = link
1314 if logger.is_on(logger.DEBUG):
1315 logger.debug(
1316 'Breaking segment %s by breaking node %x' % (
1317 ' -> '.join(['%x' % node.id for node in segment]),
1318 best_link.changeset.id,))
1320 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1322 self.changeset_graph.delete_changeset(best_link.changeset)
1324 for changeset in new_changesets:
1325 self.changeset_graph.add_new_changeset(changeset)
1327 def break_cycle(self, cycle):
1328 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1330 CYCLE is a list of SymbolChangesets where
1332 cycle[i] depends on cycle[i - 1]
1334 . Break up one or more changesets in CYCLE to make progress
1335 towards breaking the cycle. Update self.changeset_graph
1336 accordingly.
1338 It is not guaranteed that the cycle will be broken by one call to
1339 this routine, but at least some progress must be made."""
1341 if logger.is_on(logger.DEBUG):
1342 logger.debug(
1343 'Breaking cycle %s' % (
1344 ' -> '.join(['%x' % changeset.id
1345 for changeset in cycle + [cycle[0]]]),))
1347 # Unwrap the cycle into a segment then break the segment:
1348 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1350 def run(self, run_options, stats_keeper):
1351 logger.quiet("Breaking CVSSymbol dependency loops...")
1353 Ctx()._projects = read_projects(
1354 artifact_manager.get_temp_file(config.PROJECTS)
1356 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1357 Ctx()._symbol_db = SymbolDatabase()
1358 Ctx()._cvs_items_db = IndexedCVSItemStore(
1359 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1360 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1361 DB_OPEN_READ)
1363 shutil.copyfile(
1364 artifact_manager.get_temp_file(
1365 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1366 artifact_manager.get_temp_file(
1367 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1368 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1369 artifact_manager.get_temp_file(
1370 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1371 DB_OPEN_WRITE)
1373 self.changeset_db = ChangesetDatabase(
1374 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1375 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1376 DB_OPEN_NEW)
1378 self.changeset_graph = ChangesetGraph(
1379 self.changeset_db, self.cvs_item_to_changeset_id
1382 # A map {changeset_id : ordinal} for OrderedChangesets:
1383 self.ordinals = {}
1384 # A map {ordinal : changeset_id}:
1385 ordered_changeset_map = {}
1386 # A list of all BranchChangeset ids:
1387 branch_changeset_ids = []
1388 max_changeset_id = 0
1389 for changeset in self.get_source_changesets():
1390 self.changeset_db.store(changeset)
1391 self.changeset_graph.add_changeset(changeset)
1392 if isinstance(changeset, OrderedChangeset):
1393 ordered_changeset_map[changeset.ordinal] = changeset.id
1394 self.ordinals[changeset.id] = changeset.ordinal
1395 elif isinstance(changeset, BranchChangeset):
1396 branch_changeset_ids.append(changeset.id)
1397 max_changeset_id = max(max_changeset_id, changeset.id)
1399 # An array of ordered_changeset ids, indexed by ordinal:
1400 ordered_changesets = []
1401 for ordinal in range(len(ordered_changeset_map)):
1402 id = ordered_changeset_map[ordinal]
1403 ordered_changesets.append(id)
1405 ordered_changeset_ids = set(ordered_changeset_map.values())
1406 del ordered_changeset_map
1408 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1410 # First we scan through all BranchChangesets looking for
1411 # changesets that are individually "retrograde" and splitting
1412 # those up:
1413 for changeset_id in branch_changeset_ids:
1414 self._split_if_retrograde(changeset_id)
1416 del self.ordinals
1418 next_ordered_changeset = 0
1420 self.processed_changeset_logger = ProcessedChangesetLogger()
1422 while self.changeset_graph:
1423 # Consume any nodes that don't have predecessors:
1424 for (changeset, time_range) \
1425 in self.changeset_graph.consume_nopred_nodes():
1426 self.processed_changeset_logger.log(changeset.id)
1427 if changeset.id in ordered_changeset_ids:
1428 next_ordered_changeset += 1
1429 ordered_changeset_ids.remove(changeset.id)
1431 self.processed_changeset_logger.flush()
1433 if not self.changeset_graph:
1434 break
1436 # Now work on the next ordered changeset that has not yet been
1437 # processed. BreakSymbolChangesetCyclesPass has broken any
1438 # cycles involving only SymbolChangesets, so the presence of a
1439 # cycle implies that there is at least one ordered changeset
1440 # left in the graph:
1441 assert next_ordered_changeset < len(ordered_changesets)
1443 id = ordered_changesets[next_ordered_changeset]
1444 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1445 if path:
1446 if logger.is_on(logger.DEBUG):
1447 logger.debug('Breaking path from %s to %s' % (path[0], path[-1],))
1448 self.break_segment(path)
1449 else:
1450 # There were no ordered changesets among the reachable
1451 # predecessors, so do generic cycle-breaking:
1452 if logger.is_on(logger.DEBUG):
1453 logger.debug(
1454 'Breaking generic cycle found from %s'
1455 % (self.changeset_db[id],)
1457 self.break_cycle(self.changeset_graph.find_cycle(id))
1459 del self.processed_changeset_logger
1460 self.changeset_graph.close()
1461 self.changeset_graph = None
1462 self.cvs_item_to_changeset_id = None
1463 self.changeset_db = None
1465 logger.quiet("Done")
1468 class TopologicalSortPass(Pass):
1469 """Sort changesets into commit order."""
1471 def register_artifacts(self):
1472 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1473 self._register_temp_file_needed(config.PROJECTS)
1474 self._register_temp_file_needed(config.SYMBOL_DB)
1475 self._register_temp_file_needed(config.CVS_PATHS_DB)
1476 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1477 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1478 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1479 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1480 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1482 def get_source_changesets(self, changeset_db):
1483 for changeset_id in changeset_db.keys():
1484 yield changeset_db[changeset_id]
1486 def get_changesets(self):
1487 """Generate (changeset, timestamp) pairs in commit order."""
1489 changeset_db = ChangesetDatabase(
1490 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1491 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1492 DB_OPEN_READ)
1494 changeset_graph = ChangesetGraph(
1495 changeset_db,
1496 CVSItemToChangesetTable(
1497 artifact_manager.get_temp_file(
1498 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1500 DB_OPEN_READ,
1503 symbol_changeset_ids = set()
1505 for changeset in self.get_source_changesets(changeset_db):
1506 changeset_graph.add_changeset(changeset)
1507 if isinstance(changeset, SymbolChangeset):
1508 symbol_changeset_ids.add(changeset.id)
1510 # Ensure a monotonically-increasing timestamp series by keeping
1511 # track of the previous timestamp and ensuring that the following
1512 # one is larger.
1513 timestamper = Timestamper()
1515 for (changeset, time_range) in changeset_graph.consume_graph():
1516 timestamp = timestamper.get(
1517 time_range.t_max, changeset.id in symbol_changeset_ids
1519 yield (changeset, timestamp)
1521 changeset_graph.close()
1523 def run(self, run_options, stats_keeper):
1524 logger.quiet("Generating CVSRevisions in commit order...")
1526 Ctx()._projects = read_projects(
1527 artifact_manager.get_temp_file(config.PROJECTS)
1529 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1530 Ctx()._symbol_db = SymbolDatabase()
1531 Ctx()._cvs_items_db = IndexedCVSItemStore(
1532 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1533 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1534 DB_OPEN_READ)
1536 sorted_changesets = open(
1537 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1538 'w')
1540 for (changeset, timestamp) in self.get_changesets():
1541 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1543 sorted_changesets.close()
1545 Ctx()._cvs_items_db.close()
1546 Ctx()._symbol_db.close()
1547 Ctx()._cvs_path_db.close()
1549 logger.quiet("Done")
1552 class CreateRevsPass(Pass):
1553 """Generate the SVNCommit <-> CVSRevision mapping databases.
1555 SVNCommitCreator also calls SymbolingsLogger to register
1556 CVSRevisions that represent an opening or closing for a path on a
1557 branch or tag. See SymbolingsLogger for more details.
1559 This pass was formerly known as pass5."""
1561 def register_artifacts(self):
1562 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1563 self._register_temp_file(config.SVN_COMMITS_STORE)
1564 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1565 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1566 self._register_temp_file_needed(config.PROJECTS)
1567 self._register_temp_file_needed(config.CVS_PATHS_DB)
1568 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1569 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1570 self._register_temp_file_needed(config.SYMBOL_DB)
1571 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1572 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1573 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1575 def get_changesets(self):
1576 """Generate (changeset,timestamp,) tuples in commit order."""
1578 changeset_db = ChangesetDatabase(
1579 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1580 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1581 DB_OPEN_READ)
1583 for line in file(
1584 artifact_manager.get_temp_file(
1585 config.CHANGESETS_SORTED_DATAFILE)):
1586 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1587 yield (changeset_db[changeset_id], timestamp)
1589 changeset_db.close()
1591 def get_svn_commits(self, creator):
1592 """Generate the SVNCommits, in order."""
1594 for (changeset, timestamp) in self.get_changesets():
1595 for svn_commit in creator.process_changeset(changeset, timestamp):
1596 yield svn_commit
1598 def log_svn_commit(self, svn_commit):
1599 """Output information about SVN_COMMIT."""
1601 logger.normal(
1602 'Creating Subversion r%d (%s)'
1603 % (svn_commit.revnum, svn_commit.get_description(),)
1606 if isinstance(svn_commit, SVNRevisionCommit):
1607 for cvs_rev in svn_commit.cvs_revs:
1608 logger.verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1610 def run(self, run_options, stats_keeper):
1611 logger.quiet("Mapping CVS revisions to Subversion commits...")
1613 Ctx()._projects = read_projects(
1614 artifact_manager.get_temp_file(config.PROJECTS)
1616 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1617 Ctx()._symbol_db = SymbolDatabase()
1618 Ctx()._cvs_items_db = IndexedCVSItemStore(
1619 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1620 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1621 DB_OPEN_READ)
1623 Ctx()._symbolings_logger = SymbolingsLogger()
1625 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1627 creator = SVNCommitCreator()
1628 for svn_commit in self.get_svn_commits(creator):
1629 self.log_svn_commit(svn_commit)
1630 persistence_manager.put_svn_commit(svn_commit)
1632 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1633 del creator
1635 persistence_manager.close()
1636 Ctx()._symbolings_logger.close()
1637 Ctx()._cvs_items_db.close()
1638 Ctx()._symbol_db.close()
1639 Ctx()._cvs_path_db.close()
1641 logger.quiet("Done")
1644 class SortSymbolOpeningsClosingsPass(Pass):
1645 """This pass was formerly known as pass6."""
1647 def register_artifacts(self):
1648 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1649 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1651 def run(self, run_options, stats_keeper):
1652 logger.quiet("Sorting symbolic name source revisions...")
1654 def sort_key(line):
1655 line = line.split(' ', 2)
1656 return (int(line[0], 16), int(line[1]), line[2],)
1658 sort_file(
1659 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1660 artifact_manager.get_temp_file(
1661 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1663 key=sort_key,
1664 tempdirs=[Ctx().tmpdir],
1666 logger.quiet("Done")
1669 class IndexSymbolsPass(Pass):
1670 """This pass was formerly known as pass7."""
1672 def register_artifacts(self):
1673 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1674 self._register_temp_file_needed(config.PROJECTS)
1675 self._register_temp_file_needed(config.SYMBOL_DB)
1676 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1678 def generate_offsets_for_symbolings(self):
1679 """This function iterates through all the lines in
1680 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1681 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1682 where SYMBOLIC_NAME is first encountered. This will allow us to
1683 seek to the various offsets in the file and sequentially read only
1684 the openings and closings that we need."""
1686 offsets = {}
1688 f = open(
1689 artifact_manager.get_temp_file(
1690 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1691 'r')
1692 old_id = None
1693 while True:
1694 fpos = f.tell()
1695 line = f.readline()
1696 if not line:
1697 break
1698 id, svn_revnum, ignored = line.split(" ", 2)
1699 id = int(id, 16)
1700 if id != old_id:
1701 logger.verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1702 old_id = id
1703 offsets[id] = fpos
1705 f.close()
1707 offsets_db = file(
1708 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1709 cPickle.dump(offsets, offsets_db, -1)
1710 offsets_db.close()
1712 def run(self, run_options, stats_keeper):
1713 logger.quiet("Determining offsets for all symbolic names...")
1714 Ctx()._projects = read_projects(
1715 artifact_manager.get_temp_file(config.PROJECTS)
1717 Ctx()._symbol_db = SymbolDatabase()
1718 self.generate_offsets_for_symbolings()
1719 Ctx()._symbol_db.close()
1720 logger.quiet("Done.")
1723 class OutputPass(Pass):
1724 """This pass was formerly known as pass8."""
1726 def register_artifacts(self):
1727 self._register_temp_file_needed(config.PROJECTS)
1728 self._register_temp_file_needed(config.CVS_PATHS_DB)
1729 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1730 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1731 self._register_temp_file_needed(config.SYMBOL_DB)
1732 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1733 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1734 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1735 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1736 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1737 Ctx().output_option.register_artifacts(self)
1739 def run(self, run_options, stats_keeper):
1740 Ctx()._projects = read_projects(
1741 artifact_manager.get_temp_file(config.PROJECTS)
1743 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1744 Ctx()._metadata_db = MetadataDatabase(
1745 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1746 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1747 DB_OPEN_READ,
1749 Ctx()._cvs_items_db = IndexedCVSItemStore(
1750 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1751 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1752 DB_OPEN_READ)
1753 Ctx()._symbol_db = SymbolDatabase()
1754 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1756 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1758 svn_revnum = 1
1759 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1760 while svn_commit:
1761 svn_commit.output(Ctx().output_option)
1762 svn_revnum += 1
1763 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1765 Ctx().output_option.cleanup()
1766 Ctx()._persistence_manager.close()
1768 Ctx()._symbol_db.close()
1769 Ctx()._cvs_items_db.close()
1770 Ctx()._metadata_db.close()
1771 Ctx()._cvs_path_db.close()
1774 # The list of passes constituting a run of cvs2svn:
1775 passes = [
1776 CollectRevsPass(),
1777 CleanMetadataPass(),
1778 CollateSymbolsPass(),
1779 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1780 FilterSymbolsPass(),
1781 SortRevisionsPass(),
1782 SortSymbolsPass(),
1783 InitializeChangesetsPass(),
1784 #CheckIndexedItemStoreDependenciesPass(
1785 # config.CVS_ITEMS_SORTED_STORE,
1786 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1787 BreakRevisionChangesetCyclesPass(),
1788 RevisionTopologicalSortPass(),
1789 BreakSymbolChangesetCyclesPass(),
1790 BreakAllChangesetCyclesPass(),
1791 TopologicalSortPass(),
1792 CreateRevsPass(),
1793 SortSymbolOpeningsClosingsPass(),
1794 IndexSymbolsPass(),
1795 OutputPass(),