sys.modules['bsddb3'] is already in the local namespace.
[cvs2svn.git] / cvs2svn_lib / passes.py
blob3c5a097fb83e9c2019aa8bd6c6bc0b1be2b9485d
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import logger
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.repository_walker import walk_repository
78 from cvs2svn_lib.collect_data import CollectData
79 from cvs2svn_lib.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self):
89 self._register_temp_file(config.PROJECTS)
90 self._register_temp_file(config.SYMBOL_STATISTICS)
91 self._register_temp_file(config.METADATA_INDEX_TABLE)
92 self._register_temp_file(config.METADATA_STORE)
93 self._register_temp_file(config.CVS_PATHS_DB)
94 self._register_temp_file(config.CVS_ITEMS_STORE)
96 def run(self, run_options, stats_keeper):
97 logger.quiet("Examining all CVS ',v' files...")
98 Ctx()._projects = {}
99 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
100 cd = CollectData(stats_keeper)
102 # Key generator for CVSFiles:
103 file_key_generator = KeyGenerator()
105 for project in run_options.projects:
106 Ctx()._projects[project.id] = project
107 cd.process_project(
108 project,
109 walk_repository(project, file_key_generator, cd.record_fatal_error),
111 run_options.projects = None
113 fatal_errors = cd.close()
115 if fatal_errors:
116 raise FatalException("Pass 1 complete.\n"
117 + "=" * 75 + "\n"
118 + "Error summary:\n"
119 + "\n".join(fatal_errors) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs_path_db.close()
123 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
124 logger.quiet("Done")
127 class CleanMetadataPass(Pass):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self):
131 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
132 self._register_temp_file(config.METADATA_CLEAN_STORE)
133 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
134 self._register_temp_file_needed(config.METADATA_STORE)
136 def _get_clean_author(self, author):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
142 try:
143 return self._authors[author]
144 except KeyError:
145 pass
147 try:
148 clean_author = Ctx().cvs_author_decoder(author)
149 except UnicodeError:
150 self._authors[author] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
153 try:
154 clean_author = clean_author.encode('utf8')
155 except UnicodeError:
156 self._authors[author] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
159 self._authors[author] = clean_author
160 return clean_author
162 def _get_clean_log_msg(self, log_msg):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
168 try:
169 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
170 except UnicodeError:
171 raise UnicodeError(
172 'Problem decoding log message:\n'
173 '%s\n'
174 '%s\n'
175 '%s'
176 % ('-' * 75, log_msg, '-' * 75,)
179 try:
180 return clean_log_msg.encode('utf8')
181 except UnicodeError:
182 raise UnicodeError(
183 'Problem encoding log message:\n'
184 '%s\n'
185 '%s\n'
186 '%s'
187 % ('-' * 75, log_msg, '-' * 75,)
190 def _clean_metadata(self, metadata):
191 """Clean up METADATA by overwriting its members as necessary."""
193 try:
194 metadata.author = self._get_clean_author(metadata.author)
195 except UnicodeError, e:
196 logger.warn('%s: %s' % (warning_prefix, e,))
197 self.warnings = True
199 try:
200 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
201 except UnicodeError, e:
202 logger.warn('%s: %s' % (warning_prefix, e,))
203 self.warnings = True
205 def run(self, run_options, stats_keeper):
206 logger.quiet("Converting metadata to UTF8...")
207 metadata_db = MetadataDatabase(
208 artifact_manager.get_temp_file(config.METADATA_STORE),
209 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
210 DB_OPEN_READ,
212 metadata_clean_db = MetadataDatabase(
213 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
214 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
215 DB_OPEN_NEW,
218 self.warnings = False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
222 self._authors = {}
224 for id in metadata_db.iterkeys():
225 metadata = metadata_db[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata.original_author = metadata.author
231 self._clean_metadata(metadata)
233 metadata_clean_db[id] = metadata
235 if self.warnings:
236 raise FatalError(
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db.close()
244 metadata_db.close()
245 logger.quiet("Done")
248 class CollateSymbolsPass(Pass):
249 """Divide symbols into branches, tags, and excludes."""
251 conversion_names = {
252 Trunk : 'trunk',
253 Branch : 'branch',
254 Tag : 'tag',
255 ExcludedSymbol : 'exclude',
256 Symbol : '.',
259 def register_artifacts(self):
260 self._register_temp_file(config.SYMBOL_DB)
261 self._register_temp_file_needed(config.PROJECTS)
262 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
264 def get_symbol(self, run_options, stats):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
275 symbol = stats.lod
276 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
277 for rule in rules:
278 symbol = rule.get_symbol(symbol, stats)
279 assert symbol is not None
281 stats.check_valid(symbol)
283 return symbol
285 def log_symbol_summary(self, stats, symbol):
286 if not self.symbol_info_file:
287 return
289 if isinstance(symbol, Trunk):
290 name = '.trunk.'
291 preferred_parent_name = '.'
292 else:
293 name = stats.lod.name
294 if symbol.preferred_parent_id is None:
295 preferred_parent_name = '.'
296 else:
297 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
298 if isinstance(preferred_parent, Trunk):
299 preferred_parent_name = '.trunk.'
300 else:
301 preferred_parent_name = preferred_parent.name
303 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
304 symbol_path = symbol.base_path
305 else:
306 symbol_path = '.'
308 self.symbol_info_file.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats.lod.project.id,
311 name,
312 self.conversion_names[symbol.__class__],
313 symbol_path,
314 preferred_parent_name,
317 self.symbol_info_file.write(' # %s\n' % (stats,))
318 parent_counts = stats.possible_parents.items()
319 if parent_counts:
320 self.symbol_info_file.write(' # Possible parents:\n')
321 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
322 for (pp, count) in parent_counts:
323 if isinstance(pp, Trunk):
324 self.symbol_info_file.write(
325 ' # .trunk. : %d\n' % (count,)
327 else:
328 self.symbol_info_file.write(
329 ' # %s : %d\n' % (pp.name, count,)
332 def get_symbols(self, run_options):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
339 SYMBOL_STATS.
341 Raise FatalError if there was an error."""
343 errors = []
344 mismatches = []
346 if Ctx().symbol_info_filename is not None:
347 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
348 self.symbol_info_file.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
352 else:
353 self.symbol_info_file = None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
358 rules = {}
359 for rule_list in run_options.project_symbol_strategy_rules:
360 for rule in rule_list:
361 rules[id(rule)] = rule
363 for rule in rules.itervalues():
364 rule.start(self.symbol_stats)
366 retval = {}
368 for stats in self.symbol_stats:
369 try:
370 symbol = self.get_symbol(run_options, stats)
371 except IndeterminateSymbolException, e:
372 self.log_symbol_summary(stats, stats.lod)
373 mismatches.append(e.stats)
374 except SymbolPlanError, e:
375 self.log_symbol_summary(stats, stats.lod)
376 errors.append(e)
377 else:
378 self.log_symbol_summary(stats, symbol)
379 retval[stats.lod] = symbol
381 for rule in rules.itervalues():
382 rule.finish()
384 if self.symbol_info_file:
385 self.symbol_info_file.close()
387 del self.symbol_info_file
389 if errors or mismatches:
390 s = ['Problems determining how symbols should be converted:\n']
391 for e in errors:
392 s.append('%s\n' % (e,))
393 if mismatches:
394 s.append(
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
398 'and/or\n'
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats in mismatches:
402 s.append(' %s\n' % (stats,))
403 raise FatalError(''.join(s))
404 else:
405 return retval
407 def run(self, run_options, stats_keeper):
408 Ctx()._projects = read_projects(
409 artifact_manager.get_temp_file(config.PROJECTS)
411 self.symbol_stats = SymbolStatistics(
412 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
415 symbol_map = self.get_symbols(run_options)
417 # Check the symbols for consistency and bail out if there were errors:
418 self.symbol_stats.check_consistency(symbol_map)
420 # Check that the symbols all have SVN paths set and that the paths
421 # are disjoint:
422 Ctx().output_option.check_symbols(symbol_map)
424 for symbol in symbol_map.itervalues():
425 if isinstance(symbol, ExcludedSymbol):
426 self.symbol_stats.exclude_symbol(symbol)
428 create_symbol_database(symbol_map.values())
430 del self.symbol_stats
432 logger.quiet("Done")
435 class FilterSymbolsPass(Pass):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self):
442 self._register_temp_file(config.ITEM_SERIALIZER)
443 self._register_temp_file(config.CVS_REVS_DATAFILE)
444 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
445 self._register_temp_file_needed(config.PROJECTS)
446 self._register_temp_file_needed(config.SYMBOL_DB)
447 self._register_temp_file_needed(config.CVS_PATHS_DB)
448 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
449 Ctx().revision_collector.register_artifacts(self)
451 def run(self, run_options, stats_keeper):
452 Ctx()._projects = read_projects(
453 artifact_manager.get_temp_file(config.PROJECTS)
455 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
456 Ctx()._symbol_db = SymbolDatabase()
457 cvs_item_store = OldCVSItemStore(
458 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
460 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
461 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
462 cPickle.dump(cvs_item_serializer, f, -1)
463 f.close()
465 rev_db = NewSortableCVSRevisionDatabase(
466 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
467 cvs_item_serializer,
470 symbol_db = NewSortableCVSSymbolDatabase(
471 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
472 cvs_item_serializer,
475 revision_collector = Ctx().revision_collector
477 logger.quiet("Filtering out excluded symbols and summarizing items...")
479 stats_keeper.reset_cvs_rev_info()
480 revision_collector.start()
482 # Process the cvs items store one file at a time:
483 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
484 logger.verbose(cvs_file_items.cvs_file.rcs_path)
485 cvs_file_items.filter_excluded_symbols()
486 cvs_file_items.mutate_symbols()
487 cvs_file_items.adjust_parents()
488 cvs_file_items.refine_symbols()
489 cvs_file_items.determine_revision_properties(
490 Ctx().revision_property_setters
492 cvs_file_items.record_opened_symbols()
493 cvs_file_items.record_closed_symbols()
494 cvs_file_items.check_link_consistency()
496 # Give the revision collector a chance to collect data about the
497 # file:
498 revision_collector.process_file(cvs_file_items)
500 # Store whatever is left to the new file and update statistics:
501 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
502 for cvs_item in cvs_file_items.values():
503 stats_keeper.record_cvs_item(cvs_item)
505 if isinstance(cvs_item, CVSRevision):
506 rev_db.add(cvs_item)
507 elif isinstance(cvs_item, CVSSymbol):
508 symbol_db.add(cvs_item)
510 stats_keeper.set_stats_reflect_exclude(True)
512 rev_db.close()
513 symbol_db.close()
514 revision_collector.finish()
515 cvs_item_store.close()
516 Ctx()._symbol_db.close()
517 Ctx()._cvs_path_db.close()
519 logger.quiet("Done")
522 class SortRevisionsPass(Pass):
523 """Sort the revisions file."""
525 def register_artifacts(self):
526 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
527 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
529 def run(self, run_options, stats_keeper):
530 logger.quiet("Sorting CVS revision summaries...")
531 sort_file(
532 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
533 artifact_manager.get_temp_file(
534 config.CVS_REVS_SORTED_DATAFILE
536 tempdirs=[Ctx().tmpdir],
538 logger.quiet("Done")
541 class SortSymbolsPass(Pass):
542 """Sort the symbols file."""
544 def register_artifacts(self):
545 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
546 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
548 def run(self, run_options, stats_keeper):
549 logger.quiet("Sorting CVS symbol summaries...")
550 sort_file(
551 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
552 artifact_manager.get_temp_file(
553 config.CVS_SYMBOLS_SORTED_DATAFILE
555 tempdirs=[Ctx().tmpdir],
557 logger.quiet("Done")
560 class InitializeChangesetsPass(Pass):
561 """Create preliminary CommitSets."""
563 def register_artifacts(self):
564 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
565 self._register_temp_file(config.CHANGESETS_STORE)
566 self._register_temp_file(config.CHANGESETS_INDEX)
567 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
568 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
569 self._register_temp_file_needed(config.PROJECTS)
570 self._register_temp_file_needed(config.SYMBOL_DB)
571 self._register_temp_file_needed(config.CVS_PATHS_DB)
572 self._register_temp_file_needed(config.ITEM_SERIALIZER)
573 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
574 self._register_temp_file_needed(
575 config.CVS_SYMBOLS_SORTED_DATAFILE)
577 def get_revision_changesets(self):
578 """Generate revision changesets, one at a time.
580 Each time, yield a list of CVSRevisions that might potentially
581 consititute a changeset."""
583 # Create changesets for CVSRevisions:
584 old_metadata_id = None
585 old_timestamp = None
586 changeset_items = []
588 db = OldSortableCVSRevisionDatabase(
589 artifact_manager.get_temp_file(
590 config.CVS_REVS_SORTED_DATAFILE
592 self.cvs_item_serializer,
595 for cvs_rev in db:
596 if cvs_rev.metadata_id != old_metadata_id \
597 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
598 # Start a new changeset. First finish up the old changeset,
599 # if any:
600 if changeset_items:
601 yield changeset_items
602 changeset_items = []
603 old_metadata_id = cvs_rev.metadata_id
604 changeset_items.append(cvs_rev)
605 old_timestamp = cvs_rev.timestamp
607 # Finish up the last changeset, if any:
608 if changeset_items:
609 yield changeset_items
611 def get_symbol_changesets(self):
612 """Generate symbol changesets, one at a time.
614 Each time, yield a list of CVSSymbols that might potentially
615 consititute a changeset."""
617 old_symbol_id = None
618 changeset_items = []
620 db = OldSortableCVSSymbolDatabase(
621 artifact_manager.get_temp_file(
622 config.CVS_SYMBOLS_SORTED_DATAFILE
624 self.cvs_item_serializer,
627 for cvs_symbol in db:
628 if cvs_symbol.symbol.id != old_symbol_id:
629 # Start a new changeset. First finish up the old changeset,
630 # if any:
631 if changeset_items:
632 yield changeset_items
633 changeset_items = []
634 old_symbol_id = cvs_symbol.symbol.id
635 changeset_items.append(cvs_symbol)
637 # Finish up the last changeset, if any:
638 if changeset_items:
639 yield changeset_items
641 @staticmethod
642 def compare_items(a, b):
643 return (
644 cmp(a.timestamp, b.timestamp)
645 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
646 or cmp([int(x) for x in a.rev.split('.')],
647 [int(x) for x in b.rev.split('.')])
648 or cmp(a.id, b.id))
650 def break_internal_dependencies(self, changeset_items):
651 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
653 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
654 belong in a single RevisionChangeset, but there might be internal
655 dependencies among the items. Return a list of lists, where each
656 sublist is a list of CVSRevisions and at least one internal
657 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
658 to be split, then the return value will contain a single value,
659 namely the original value of CHANGESET_ITEMS. Split
660 CHANGESET_ITEMS at most once, even though the resulting changesets
661 might themselves have internal dependencies."""
663 # We only look for succ dependencies, since by doing so we
664 # automatically cover pred dependencies as well. First create a
665 # list of tuples (pred, succ) of id pairs for CVSItems that depend
666 # on each other.
667 dependencies = []
668 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
669 for cvs_item in changeset_items:
670 for next_id in cvs_item.get_succ_ids():
671 if next_id in changeset_cvs_item_ids:
672 # Sanity check: a CVSItem should never depend on itself:
673 if next_id == cvs_item.id:
674 raise InternalError('Item depends on itself: %s' % (cvs_item,))
676 dependencies.append((cvs_item.id, next_id,))
678 if dependencies:
679 # Sort the changeset_items in a defined order (chronological to the
680 # extent that the timestamps are correct and unique).
681 changeset_items.sort(self.compare_items)
682 indexes = {}
683 for (i, changeset_item) in enumerate(changeset_items):
684 indexes[changeset_item.id] = i
685 # How many internal dependencies would be broken by breaking the
686 # Changeset after a particular index?
687 breaks = [0] * len(changeset_items)
688 for (pred, succ,) in dependencies:
689 pred_index = indexes[pred]
690 succ_index = indexes[succ]
691 breaks[min(pred_index, succ_index)] += 1
692 breaks[max(pred_index, succ_index)] -= 1
693 best_i = None
694 best_count = -1
695 best_time = 0
696 for i in range(1, len(breaks)):
697 breaks[i] += breaks[i - 1]
698 for i in range(0, len(breaks) - 1):
699 if breaks[i] > best_count:
700 best_i = i
701 best_count = breaks[i]
702 best_time = (changeset_items[i + 1].timestamp
703 - changeset_items[i].timestamp)
704 elif breaks[i] == best_count \
705 and (changeset_items[i + 1].timestamp
706 - changeset_items[i].timestamp) < best_time:
707 best_i = i
708 best_count = breaks[i]
709 best_time = (changeset_items[i + 1].timestamp
710 - changeset_items[i].timestamp)
711 # Reuse the old changeset.id for the first of the split changesets.
712 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
713 else:
714 return [changeset_items]
716 def break_all_internal_dependencies(self, changeset_items):
717 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
719 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
720 be part of a single changeset. Break this list into sublists,
721 where the CVSRevisions in each sublist are free of mutual
722 dependencies."""
724 # This method is written non-recursively to avoid any possible
725 # problems with recursion depth.
727 changesets_to_split = [changeset_items]
728 while changesets_to_split:
729 changesets = self.break_internal_dependencies(changesets_to_split.pop())
730 if len(changesets) == 1:
731 [changeset_items] = changesets
732 yield changeset_items
733 else:
734 # The changeset had to be split; see if either of the
735 # fragments have to be split:
736 changesets.reverse()
737 changesets_to_split.extend(changesets)
739 def get_changesets(self):
740 """Generate (Changeset, [CVSItem,...]) for all changesets.
742 The Changesets already have their internal dependencies broken.
743 The [CVSItem,...] list is the list of CVSItems in the
744 corresponding Changeset."""
746 for changeset_items in self.get_revision_changesets():
747 for split_changeset_items \
748 in self.break_all_internal_dependencies(changeset_items):
749 yield (
750 RevisionChangeset(
751 self.changeset_key_generator.gen_id(),
752 [cvs_rev.id for cvs_rev in split_changeset_items]
754 split_changeset_items,
757 for changeset_items in self.get_symbol_changesets():
758 yield (
759 create_symbol_changeset(
760 self.changeset_key_generator.gen_id(),
761 changeset_items[0].symbol,
762 [cvs_symbol.id for cvs_symbol in changeset_items]
764 changeset_items,
767 def run(self, run_options, stats_keeper):
768 logger.quiet("Creating preliminary commit sets...")
770 Ctx()._projects = read_projects(
771 artifact_manager.get_temp_file(config.PROJECTS)
773 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
774 Ctx()._symbol_db = SymbolDatabase()
776 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
777 self.cvs_item_serializer = cPickle.load(f)
778 f.close()
780 changeset_db = ChangesetDatabase(
781 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
782 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
783 DB_OPEN_NEW,
785 cvs_item_to_changeset_id = CVSItemToChangesetTable(
786 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
787 DB_OPEN_NEW,
790 self.sorted_cvs_items_db = IndexedCVSItemStore(
791 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
792 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
793 DB_OPEN_NEW)
795 self.changeset_key_generator = KeyGenerator()
797 for (changeset, changeset_items) in self.get_changesets():
798 if logger.is_on(logger.DEBUG):
799 logger.debug(repr(changeset))
800 changeset_db.store(changeset)
801 for cvs_item in changeset_items:
802 self.sorted_cvs_items_db.add(cvs_item)
803 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
805 self.sorted_cvs_items_db.close()
806 cvs_item_to_changeset_id.close()
807 changeset_db.close()
808 Ctx()._symbol_db.close()
809 Ctx()._cvs_path_db.close()
811 del self.cvs_item_serializer
813 logger.quiet("Done")
816 class ProcessedChangesetLogger:
817 def __init__(self):
818 self.processed_changeset_ids = []
820 def log(self, changeset_id):
821 if logger.is_on(logger.DEBUG):
822 self.processed_changeset_ids.append(changeset_id)
824 def flush(self):
825 if self.processed_changeset_ids:
826 logger.debug(
827 'Consumed changeset ids %s'
828 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
830 del self.processed_changeset_ids[:]
833 class BreakRevisionChangesetCyclesPass(Pass):
834 """Break up any dependency cycles involving only RevisionChangesets."""
836 def register_artifacts(self):
837 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
838 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
839 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
840 self._register_temp_file_needed(config.PROJECTS)
841 self._register_temp_file_needed(config.SYMBOL_DB)
842 self._register_temp_file_needed(config.CVS_PATHS_DB)
843 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
844 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
845 self._register_temp_file_needed(config.CHANGESETS_STORE)
846 self._register_temp_file_needed(config.CHANGESETS_INDEX)
847 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
849 def get_source_changesets(self):
850 old_changeset_db = ChangesetDatabase(
851 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
852 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
853 DB_OPEN_READ)
855 changeset_ids = old_changeset_db.keys()
857 for changeset_id in changeset_ids:
858 yield old_changeset_db[changeset_id]
860 old_changeset_db.close()
861 del old_changeset_db
863 def break_cycle(self, cycle):
864 """Break up one or more changesets in CYCLE to help break the cycle.
866 CYCLE is a list of Changesets where
868 cycle[i] depends on cycle[i - 1]
870 Break up one or more changesets in CYCLE to make progress towards
871 breaking the cycle. Update self.changeset_graph accordingly.
873 It is not guaranteed that the cycle will be broken by one call to
874 this routine, but at least some progress must be made."""
876 self.processed_changeset_logger.flush()
877 best_i = None
878 best_link = None
879 for i in range(len(cycle)):
880 # It's OK if this index wraps to -1:
881 link = ChangesetGraphLink(
882 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
884 if best_i is None or link < best_link:
885 best_i = i
886 best_link = link
888 if logger.is_on(logger.DEBUG):
889 logger.debug(
890 'Breaking cycle %s by breaking node %x' % (
891 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
892 best_link.changeset.id,))
894 new_changesets = best_link.break_changeset(self.changeset_key_generator)
896 self.changeset_graph.delete_changeset(best_link.changeset)
898 for changeset in new_changesets:
899 self.changeset_graph.add_new_changeset(changeset)
901 def run(self, run_options, stats_keeper):
902 logger.quiet("Breaking revision changeset dependency cycles...")
904 Ctx()._projects = read_projects(
905 artifact_manager.get_temp_file(config.PROJECTS)
907 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
908 Ctx()._symbol_db = SymbolDatabase()
909 Ctx()._cvs_items_db = IndexedCVSItemStore(
910 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
911 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
912 DB_OPEN_READ)
914 shutil.copyfile(
915 artifact_manager.get_temp_file(
916 config.CVS_ITEM_TO_CHANGESET),
917 artifact_manager.get_temp_file(
918 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
919 cvs_item_to_changeset_id = CVSItemToChangesetTable(
920 artifact_manager.get_temp_file(
921 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
922 DB_OPEN_WRITE)
924 changeset_db = ChangesetDatabase(
925 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
926 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
927 DB_OPEN_NEW)
929 self.changeset_graph = ChangesetGraph(
930 changeset_db, cvs_item_to_changeset_id
933 max_changeset_id = 0
934 for changeset in self.get_source_changesets():
935 changeset_db.store(changeset)
936 if isinstance(changeset, RevisionChangeset):
937 self.changeset_graph.add_changeset(changeset)
938 max_changeset_id = max(max_changeset_id, changeset.id)
940 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
942 self.processed_changeset_logger = ProcessedChangesetLogger()
944 # Consume the graph, breaking cycles using self.break_cycle():
945 for (changeset, time_range) in self.changeset_graph.consume_graph(
946 cycle_breaker=self.break_cycle
948 self.processed_changeset_logger.log(changeset.id)
950 self.processed_changeset_logger.flush()
951 del self.processed_changeset_logger
953 self.changeset_graph.close()
954 self.changeset_graph = None
955 Ctx()._cvs_items_db.close()
956 Ctx()._symbol_db.close()
957 Ctx()._cvs_path_db.close()
959 logger.quiet("Done")
962 class RevisionTopologicalSortPass(Pass):
963 """Sort RevisionChangesets into commit order.
965 Also convert them to OrderedChangesets, without changing their ids."""
967 def register_artifacts(self):
968 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
969 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
970 self._register_temp_file_needed(config.PROJECTS)
971 self._register_temp_file_needed(config.SYMBOL_DB)
972 self._register_temp_file_needed(config.CVS_PATHS_DB)
973 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
974 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
975 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
976 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
977 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
979 def get_source_changesets(self, changeset_db):
980 changeset_ids = changeset_db.keys()
982 for changeset_id in changeset_ids:
983 yield changeset_db[changeset_id]
985 def get_changesets(self):
986 changeset_db = ChangesetDatabase(
987 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
988 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
989 DB_OPEN_READ,
992 changeset_graph = ChangesetGraph(
993 changeset_db,
994 CVSItemToChangesetTable(
995 artifact_manager.get_temp_file(
996 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
998 DB_OPEN_READ,
1002 for changeset in self.get_source_changesets(changeset_db):
1003 if isinstance(changeset, RevisionChangeset):
1004 changeset_graph.add_changeset(changeset)
1005 else:
1006 yield changeset
1008 changeset_ids = []
1010 # Sentry:
1011 changeset_ids.append(None)
1013 for (changeset, time_range) in changeset_graph.consume_graph():
1014 changeset_ids.append(changeset.id)
1016 # Sentry:
1017 changeset_ids.append(None)
1019 for i in range(1, len(changeset_ids) - 1):
1020 changeset = changeset_db[changeset_ids[i]]
1021 yield OrderedChangeset(
1022 changeset.id, changeset.cvs_item_ids, i - 1,
1023 changeset_ids[i - 1], changeset_ids[i + 1])
1025 changeset_graph.close()
1027 def run(self, run_options, stats_keeper):
1028 logger.quiet("Generating CVSRevisions in commit order...")
1030 Ctx()._projects = read_projects(
1031 artifact_manager.get_temp_file(config.PROJECTS)
1033 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1034 Ctx()._symbol_db = SymbolDatabase()
1035 Ctx()._cvs_items_db = IndexedCVSItemStore(
1036 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1037 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1038 DB_OPEN_READ)
1040 changesets_revordered_db = ChangesetDatabase(
1041 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1042 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1043 DB_OPEN_NEW)
1045 for changeset in self.get_changesets():
1046 changesets_revordered_db.store(changeset)
1048 changesets_revordered_db.close()
1049 Ctx()._cvs_items_db.close()
1050 Ctx()._symbol_db.close()
1051 Ctx()._cvs_path_db.close()
1053 logger.quiet("Done")
1056 class BreakSymbolChangesetCyclesPass(Pass):
1057 """Break up any dependency cycles involving only SymbolChangesets."""
1059 def register_artifacts(self):
1060 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1061 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1062 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1063 self._register_temp_file_needed(config.PROJECTS)
1064 self._register_temp_file_needed(config.SYMBOL_DB)
1065 self._register_temp_file_needed(config.CVS_PATHS_DB)
1066 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1067 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1068 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1069 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1070 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1072 def get_source_changesets(self):
1073 old_changeset_db = ChangesetDatabase(
1074 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1075 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1076 DB_OPEN_READ)
1078 changeset_ids = old_changeset_db.keys()
1080 for changeset_id in changeset_ids:
1081 yield old_changeset_db[changeset_id]
1083 old_changeset_db.close()
1085 def break_cycle(self, cycle):
1086 """Break up one or more changesets in CYCLE to help break the cycle.
1088 CYCLE is a list of Changesets where
1090 cycle[i] depends on cycle[i - 1]
1092 Break up one or more changesets in CYCLE to make progress towards
1093 breaking the cycle. Update self.changeset_graph accordingly.
1095 It is not guaranteed that the cycle will be broken by one call to
1096 this routine, but at least some progress must be made."""
1098 self.processed_changeset_logger.flush()
1099 best_i = None
1100 best_link = None
1101 for i in range(len(cycle)):
1102 # It's OK if this index wraps to -1:
1103 link = ChangesetGraphLink(
1104 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1106 if best_i is None or link < best_link:
1107 best_i = i
1108 best_link = link
1110 if logger.is_on(logger.DEBUG):
1111 logger.debug(
1112 'Breaking cycle %s by breaking node %x' % (
1113 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1114 best_link.changeset.id,))
1116 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1118 self.changeset_graph.delete_changeset(best_link.changeset)
1120 for changeset in new_changesets:
1121 self.changeset_graph.add_new_changeset(changeset)
1123 def run(self, run_options, stats_keeper):
1124 logger.quiet("Breaking symbol changeset dependency cycles...")
1126 Ctx()._projects = read_projects(
1127 artifact_manager.get_temp_file(config.PROJECTS)
1129 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1130 Ctx()._symbol_db = SymbolDatabase()
1131 Ctx()._cvs_items_db = IndexedCVSItemStore(
1132 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1133 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1134 DB_OPEN_READ)
1136 shutil.copyfile(
1137 artifact_manager.get_temp_file(
1138 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1139 artifact_manager.get_temp_file(
1140 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1141 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1142 artifact_manager.get_temp_file(
1143 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1144 DB_OPEN_WRITE)
1146 changeset_db = ChangesetDatabase(
1147 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1148 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1149 DB_OPEN_NEW)
1151 self.changeset_graph = ChangesetGraph(
1152 changeset_db, cvs_item_to_changeset_id
1155 max_changeset_id = 0
1156 for changeset in self.get_source_changesets():
1157 changeset_db.store(changeset)
1158 if isinstance(changeset, SymbolChangeset):
1159 self.changeset_graph.add_changeset(changeset)
1160 max_changeset_id = max(max_changeset_id, changeset.id)
1162 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1164 self.processed_changeset_logger = ProcessedChangesetLogger()
1166 # Consume the graph, breaking cycles using self.break_cycle():
1167 for (changeset, time_range) in self.changeset_graph.consume_graph(
1168 cycle_breaker=self.break_cycle
1170 self.processed_changeset_logger.log(changeset.id)
1172 self.processed_changeset_logger.flush()
1173 del self.processed_changeset_logger
1175 self.changeset_graph.close()
1176 self.changeset_graph = None
1177 Ctx()._cvs_items_db.close()
1178 Ctx()._symbol_db.close()
1179 Ctx()._cvs_path_db.close()
1181 logger.quiet("Done")
1184 class BreakAllChangesetCyclesPass(Pass):
1185 """Break up any dependency cycles that are closed by SymbolChangesets."""
1187 def register_artifacts(self):
1188 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1189 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1190 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1191 self._register_temp_file_needed(config.PROJECTS)
1192 self._register_temp_file_needed(config.SYMBOL_DB)
1193 self._register_temp_file_needed(config.CVS_PATHS_DB)
1194 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1195 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1196 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1197 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1198 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1200 def get_source_changesets(self):
1201 old_changeset_db = ChangesetDatabase(
1202 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1203 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1204 DB_OPEN_READ)
1206 changeset_ids = old_changeset_db.keys()
1208 for changeset_id in changeset_ids:
1209 yield old_changeset_db[changeset_id]
1211 old_changeset_db.close()
1213 def _split_retrograde_changeset(self, changeset):
1214 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1216 logger.debug('Breaking retrograde changeset %x' % (changeset.id,))
1218 self.changeset_graph.delete_changeset(changeset)
1220 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1221 ordinal_limits = {}
1222 for cvs_branch in changeset.iter_cvs_items():
1223 max_pred_ordinal = 0
1224 min_succ_ordinal = sys.maxint
1226 for pred_id in cvs_branch.get_pred_ids():
1227 pred_ordinal = self.ordinals.get(
1228 self.cvs_item_to_changeset_id[pred_id], 0)
1229 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1231 for succ_id in cvs_branch.get_succ_ids():
1232 succ_ordinal = self.ordinals.get(
1233 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1234 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1236 assert max_pred_ordinal < min_succ_ordinal
1237 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1239 # Find the earliest successor ordinal:
1240 min_min_succ_ordinal = sys.maxint
1241 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1242 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1244 early_item_ids = []
1245 late_item_ids = []
1246 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1247 if max_pred_ordinal >= min_min_succ_ordinal:
1248 late_item_ids.append(id)
1249 else:
1250 early_item_ids.append(id)
1252 assert early_item_ids
1253 assert late_item_ids
1255 early_changeset = changeset.create_split_changeset(
1256 self.changeset_key_generator.gen_id(), early_item_ids)
1257 late_changeset = changeset.create_split_changeset(
1258 self.changeset_key_generator.gen_id(), late_item_ids)
1260 self.changeset_graph.add_new_changeset(early_changeset)
1261 self.changeset_graph.add_new_changeset(late_changeset)
1263 early_split = self._split_if_retrograde(early_changeset.id)
1265 # Because of the way we constructed it, the early changeset should
1266 # not have to be split:
1267 assert not early_split
1269 self._split_if_retrograde(late_changeset.id)
1271 def _split_if_retrograde(self, changeset_id):
1272 node = self.changeset_graph[changeset_id]
1273 pred_ordinals = [
1274 self.ordinals[id]
1275 for id in node.pred_ids
1276 if id in self.ordinals
1278 pred_ordinals.sort()
1279 succ_ordinals = [
1280 self.ordinals[id]
1281 for id in node.succ_ids
1282 if id in self.ordinals
1284 succ_ordinals.sort()
1285 if pred_ordinals and succ_ordinals \
1286 and pred_ordinals[-1] >= succ_ordinals[0]:
1287 self._split_retrograde_changeset(self.changeset_db[node.id])
1288 return True
1289 else:
1290 return False
1292 def break_segment(self, segment):
1293 """Break a changeset in SEGMENT[1:-1].
1295 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1296 that range are SymbolChangesets."""
1298 best_i = None
1299 best_link = None
1300 for i in range(1, len(segment) - 1):
1301 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1303 if best_i is None or link < best_link:
1304 best_i = i
1305 best_link = link
1307 if logger.is_on(logger.DEBUG):
1308 logger.debug(
1309 'Breaking segment %s by breaking node %x' % (
1310 ' -> '.join(['%x' % node.id for node in segment]),
1311 best_link.changeset.id,))
1313 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1315 self.changeset_graph.delete_changeset(best_link.changeset)
1317 for changeset in new_changesets:
1318 self.changeset_graph.add_new_changeset(changeset)
1320 def break_cycle(self, cycle):
1321 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1323 CYCLE is a list of SymbolChangesets where
1325 cycle[i] depends on cycle[i - 1]
1327 . Break up one or more changesets in CYCLE to make progress
1328 towards breaking the cycle. Update self.changeset_graph
1329 accordingly.
1331 It is not guaranteed that the cycle will be broken by one call to
1332 this routine, but at least some progress must be made."""
1334 if logger.is_on(logger.DEBUG):
1335 logger.debug(
1336 'Breaking cycle %s' % (
1337 ' -> '.join(['%x' % changeset.id
1338 for changeset in cycle + [cycle[0]]]),))
1340 # Unwrap the cycle into a segment then break the segment:
1341 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1343 def run(self, run_options, stats_keeper):
1344 logger.quiet("Breaking CVSSymbol dependency loops...")
1346 Ctx()._projects = read_projects(
1347 artifact_manager.get_temp_file(config.PROJECTS)
1349 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1350 Ctx()._symbol_db = SymbolDatabase()
1351 Ctx()._cvs_items_db = IndexedCVSItemStore(
1352 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1353 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1354 DB_OPEN_READ)
1356 shutil.copyfile(
1357 artifact_manager.get_temp_file(
1358 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1359 artifact_manager.get_temp_file(
1360 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1361 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1362 artifact_manager.get_temp_file(
1363 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1364 DB_OPEN_WRITE)
1366 self.changeset_db = ChangesetDatabase(
1367 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1368 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1369 DB_OPEN_NEW)
1371 self.changeset_graph = ChangesetGraph(
1372 self.changeset_db, self.cvs_item_to_changeset_id
1375 # A map {changeset_id : ordinal} for OrderedChangesets:
1376 self.ordinals = {}
1377 # A map {ordinal : changeset_id}:
1378 ordered_changeset_map = {}
1379 # A list of all BranchChangeset ids:
1380 branch_changeset_ids = []
1381 max_changeset_id = 0
1382 for changeset in self.get_source_changesets():
1383 self.changeset_db.store(changeset)
1384 self.changeset_graph.add_changeset(changeset)
1385 if isinstance(changeset, OrderedChangeset):
1386 ordered_changeset_map[changeset.ordinal] = changeset.id
1387 self.ordinals[changeset.id] = changeset.ordinal
1388 elif isinstance(changeset, BranchChangeset):
1389 branch_changeset_ids.append(changeset.id)
1390 max_changeset_id = max(max_changeset_id, changeset.id)
1392 # An array of ordered_changeset ids, indexed by ordinal:
1393 ordered_changesets = []
1394 for ordinal in range(len(ordered_changeset_map)):
1395 id = ordered_changeset_map[ordinal]
1396 ordered_changesets.append(id)
1398 ordered_changeset_ids = set(ordered_changeset_map.values())
1399 del ordered_changeset_map
1401 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1403 # First we scan through all BranchChangesets looking for
1404 # changesets that are individually "retrograde" and splitting
1405 # those up:
1406 for changeset_id in branch_changeset_ids:
1407 self._split_if_retrograde(changeset_id)
1409 del self.ordinals
1411 next_ordered_changeset = 0
1413 self.processed_changeset_logger = ProcessedChangesetLogger()
1415 while self.changeset_graph:
1416 # Consume any nodes that don't have predecessors:
1417 for (changeset, time_range) \
1418 in self.changeset_graph.consume_nopred_nodes():
1419 self.processed_changeset_logger.log(changeset.id)
1420 if changeset.id in ordered_changeset_ids:
1421 next_ordered_changeset += 1
1422 ordered_changeset_ids.remove(changeset.id)
1424 self.processed_changeset_logger.flush()
1426 if not self.changeset_graph:
1427 break
1429 # Now work on the next ordered changeset that has not yet been
1430 # processed. BreakSymbolChangesetCyclesPass has broken any
1431 # cycles involving only SymbolChangesets, so the presence of a
1432 # cycle implies that there is at least one ordered changeset
1433 # left in the graph:
1434 assert next_ordered_changeset < len(ordered_changesets)
1436 id = ordered_changesets[next_ordered_changeset]
1437 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1438 if path:
1439 if logger.is_on(logger.DEBUG):
1440 logger.debug('Breaking path from %s to %s' % (path[0], path[-1],))
1441 self.break_segment(path)
1442 else:
1443 # There were no ordered changesets among the reachable
1444 # predecessors, so do generic cycle-breaking:
1445 if logger.is_on(logger.DEBUG):
1446 logger.debug(
1447 'Breaking generic cycle found from %s'
1448 % (self.changeset_db[id],)
1450 self.break_cycle(self.changeset_graph.find_cycle(id))
1452 del self.processed_changeset_logger
1453 self.changeset_graph.close()
1454 self.changeset_graph = None
1455 self.cvs_item_to_changeset_id = None
1456 self.changeset_db = None
1458 logger.quiet("Done")
1461 class TopologicalSortPass(Pass):
1462 """Sort changesets into commit order."""
1464 def register_artifacts(self):
1465 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1466 self._register_temp_file_needed(config.PROJECTS)
1467 self._register_temp_file_needed(config.SYMBOL_DB)
1468 self._register_temp_file_needed(config.CVS_PATHS_DB)
1469 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1470 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1471 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1472 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1473 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1475 def get_source_changesets(self, changeset_db):
1476 for changeset_id in changeset_db.keys():
1477 yield changeset_db[changeset_id]
1479 def get_changesets(self):
1480 """Generate (changeset, timestamp) pairs in commit order."""
1482 changeset_db = ChangesetDatabase(
1483 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1484 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1485 DB_OPEN_READ)
1487 changeset_graph = ChangesetGraph(
1488 changeset_db,
1489 CVSItemToChangesetTable(
1490 artifact_manager.get_temp_file(
1491 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1493 DB_OPEN_READ,
1496 symbol_changeset_ids = set()
1498 for changeset in self.get_source_changesets(changeset_db):
1499 changeset_graph.add_changeset(changeset)
1500 if isinstance(changeset, SymbolChangeset):
1501 symbol_changeset_ids.add(changeset.id)
1503 # Ensure a monotonically-increasing timestamp series by keeping
1504 # track of the previous timestamp and ensuring that the following
1505 # one is larger.
1506 timestamper = Timestamper()
1508 for (changeset, time_range) in changeset_graph.consume_graph():
1509 timestamp = timestamper.get(
1510 time_range.t_max, changeset.id in symbol_changeset_ids
1512 yield (changeset, timestamp)
1514 changeset_graph.close()
1516 def run(self, run_options, stats_keeper):
1517 logger.quiet("Generating CVSRevisions in commit order...")
1519 Ctx()._projects = read_projects(
1520 artifact_manager.get_temp_file(config.PROJECTS)
1522 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1523 Ctx()._symbol_db = SymbolDatabase()
1524 Ctx()._cvs_items_db = IndexedCVSItemStore(
1525 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1526 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1527 DB_OPEN_READ)
1529 sorted_changesets = open(
1530 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1531 'w')
1533 for (changeset, timestamp) in self.get_changesets():
1534 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1536 sorted_changesets.close()
1538 Ctx()._cvs_items_db.close()
1539 Ctx()._symbol_db.close()
1540 Ctx()._cvs_path_db.close()
1542 logger.quiet("Done")
1545 class CreateRevsPass(Pass):
1546 """Generate the SVNCommit <-> CVSRevision mapping databases.
1548 SVNCommitCreator also calls SymbolingsLogger to register
1549 CVSRevisions that represent an opening or closing for a path on a
1550 branch or tag. See SymbolingsLogger for more details.
1552 This pass was formerly known as pass5."""
1554 def register_artifacts(self):
1555 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1556 self._register_temp_file(config.SVN_COMMITS_STORE)
1557 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1558 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1559 self._register_temp_file_needed(config.PROJECTS)
1560 self._register_temp_file_needed(config.CVS_PATHS_DB)
1561 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1562 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1563 self._register_temp_file_needed(config.SYMBOL_DB)
1564 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1565 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1566 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1568 def get_changesets(self):
1569 """Generate (changeset,timestamp,) tuples in commit order."""
1571 changeset_db = ChangesetDatabase(
1572 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1573 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1574 DB_OPEN_READ)
1576 for line in file(
1577 artifact_manager.get_temp_file(
1578 config.CHANGESETS_SORTED_DATAFILE)):
1579 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1580 yield (changeset_db[changeset_id], timestamp)
1582 changeset_db.close()
1584 def get_svn_commits(self, creator):
1585 """Generate the SVNCommits, in order."""
1587 for (changeset, timestamp) in self.get_changesets():
1588 for svn_commit in creator.process_changeset(changeset, timestamp):
1589 yield svn_commit
1591 def log_svn_commit(self, svn_commit):
1592 """Output information about SVN_COMMIT."""
1594 logger.normal(
1595 'Creating Subversion r%d (%s)'
1596 % (svn_commit.revnum, svn_commit.get_description(),)
1599 if isinstance(svn_commit, SVNRevisionCommit):
1600 for cvs_rev in svn_commit.cvs_revs:
1601 logger.verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1603 def run(self, run_options, stats_keeper):
1604 logger.quiet("Mapping CVS revisions to Subversion commits...")
1606 Ctx()._projects = read_projects(
1607 artifact_manager.get_temp_file(config.PROJECTS)
1609 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1610 Ctx()._symbol_db = SymbolDatabase()
1611 Ctx()._cvs_items_db = IndexedCVSItemStore(
1612 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1613 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1614 DB_OPEN_READ)
1616 Ctx()._symbolings_logger = SymbolingsLogger()
1618 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1620 creator = SVNCommitCreator()
1621 for svn_commit in self.get_svn_commits(creator):
1622 self.log_svn_commit(svn_commit)
1623 persistence_manager.put_svn_commit(svn_commit)
1625 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1626 del creator
1628 persistence_manager.close()
1629 Ctx()._symbolings_logger.close()
1630 Ctx()._cvs_items_db.close()
1631 Ctx()._symbol_db.close()
1632 Ctx()._cvs_path_db.close()
1634 logger.quiet("Done")
1637 class SortSymbolOpeningsClosingsPass(Pass):
1638 """This pass was formerly known as pass6."""
1640 def register_artifacts(self):
1641 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1642 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1644 def run(self, run_options, stats_keeper):
1645 logger.quiet("Sorting symbolic name source revisions...")
1647 def sort_key(line):
1648 line = line.split(' ', 2)
1649 return (int(line[0], 16), int(line[1]), line[2],)
1651 sort_file(
1652 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1653 artifact_manager.get_temp_file(
1654 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1656 key=sort_key,
1657 tempdirs=[Ctx().tmpdir],
1659 logger.quiet("Done")
1662 class IndexSymbolsPass(Pass):
1663 """This pass was formerly known as pass7."""
1665 def register_artifacts(self):
1666 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1667 self._register_temp_file_needed(config.PROJECTS)
1668 self._register_temp_file_needed(config.SYMBOL_DB)
1669 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1671 def generate_offsets_for_symbolings(self):
1672 """This function iterates through all the lines in
1673 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1674 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1675 where SYMBOLIC_NAME is first encountered. This will allow us to
1676 seek to the various offsets in the file and sequentially read only
1677 the openings and closings that we need."""
1679 offsets = {}
1681 f = open(
1682 artifact_manager.get_temp_file(
1683 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1684 'r')
1685 old_id = None
1686 while True:
1687 fpos = f.tell()
1688 line = f.readline()
1689 if not line:
1690 break
1691 id, svn_revnum, ignored = line.split(" ", 2)
1692 id = int(id, 16)
1693 if id != old_id:
1694 logger.verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1695 old_id = id
1696 offsets[id] = fpos
1698 f.close()
1700 offsets_db = file(
1701 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1702 cPickle.dump(offsets, offsets_db, -1)
1703 offsets_db.close()
1705 def run(self, run_options, stats_keeper):
1706 logger.quiet("Determining offsets for all symbolic names...")
1707 Ctx()._projects = read_projects(
1708 artifact_manager.get_temp_file(config.PROJECTS)
1710 Ctx()._symbol_db = SymbolDatabase()
1711 self.generate_offsets_for_symbolings()
1712 Ctx()._symbol_db.close()
1713 logger.quiet("Done.")
1716 class OutputPass(Pass):
1717 """This pass was formerly known as pass8."""
1719 def register_artifacts(self):
1720 self._register_temp_file_needed(config.PROJECTS)
1721 self._register_temp_file_needed(config.CVS_PATHS_DB)
1722 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1723 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1724 self._register_temp_file_needed(config.SYMBOL_DB)
1725 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1726 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1727 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1728 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1729 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1730 Ctx().output_option.register_artifacts(self)
1732 def run(self, run_options, stats_keeper):
1733 Ctx()._projects = read_projects(
1734 artifact_manager.get_temp_file(config.PROJECTS)
1736 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1737 Ctx()._metadata_db = MetadataDatabase(
1738 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1739 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1740 DB_OPEN_READ,
1742 Ctx()._cvs_items_db = IndexedCVSItemStore(
1743 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1744 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1745 DB_OPEN_READ)
1746 Ctx()._symbol_db = SymbolDatabase()
1747 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1749 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1751 svn_revnum = 1
1752 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1753 while svn_commit:
1754 svn_commit.output(Ctx().output_option)
1755 svn_revnum += 1
1756 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1758 Ctx().output_option.cleanup()
1759 Ctx()._persistence_manager.close()
1761 Ctx()._symbol_db.close()
1762 Ctx()._cvs_items_db.close()
1763 Ctx()._metadata_db.close()
1764 Ctx()._cvs_path_db.close()
1767 # The list of passes constituting a run of cvs2svn:
1768 passes = [
1769 CollectRevsPass(),
1770 CleanMetadataPass(),
1771 CollateSymbolsPass(),
1772 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1773 FilterSymbolsPass(),
1774 SortRevisionsPass(),
1775 SortSymbolsPass(),
1776 InitializeChangesetsPass(),
1777 #CheckIndexedItemStoreDependenciesPass(
1778 # config.CVS_ITEMS_SORTED_STORE,
1779 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1780 BreakRevisionChangesetCyclesPass(),
1781 RevisionTopologicalSortPass(),
1782 BreakSymbolChangesetCyclesPass(),
1783 BreakAllChangesetCyclesPass(),
1784 TopologicalSortPass(),
1785 CreateRevsPass(),
1786 SortSymbolOpeningsClosingsPass(),
1787 IndexSymbolsPass(),
1788 OutputPass(),