InitializeChangesetsPass: Fully initialize breaks array before continuing.
[cvs2svn.git] / cvs2svn_lib / passes.py
blob424dab9280eb934ceb0330d66f9d3a67620613dc
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import logger
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.repository_walker import walk_repository
78 from cvs2svn_lib.collect_data import CollectData
79 from cvs2svn_lib.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self):
89 self._register_temp_file(config.PROJECTS)
90 self._register_temp_file(config.SYMBOL_STATISTICS)
91 self._register_temp_file(config.METADATA_INDEX_TABLE)
92 self._register_temp_file(config.METADATA_STORE)
93 self._register_temp_file(config.CVS_PATHS_DB)
94 self._register_temp_file(config.CVS_ITEMS_STORE)
96 def run(self, run_options, stats_keeper):
97 logger.quiet("Examining all CVS ',v' files...")
98 Ctx()._projects = {}
99 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
100 cd = CollectData(stats_keeper)
102 # Key generator for CVSFiles:
103 file_key_generator = KeyGenerator()
105 for project in run_options.projects:
106 Ctx()._projects[project.id] = project
107 cd.process_project(
108 project,
109 walk_repository(project, file_key_generator, cd.record_fatal_error),
111 run_options.projects = None
113 fatal_errors = cd.close()
115 if fatal_errors:
116 raise FatalException("Pass 1 complete.\n"
117 + "=" * 75 + "\n"
118 + "Error summary:\n"
119 + "\n".join(fatal_errors) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs_path_db.close()
123 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
124 logger.quiet("Done")
127 class CleanMetadataPass(Pass):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self):
131 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
132 self._register_temp_file(config.METADATA_CLEAN_STORE)
133 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
134 self._register_temp_file_needed(config.METADATA_STORE)
136 def _get_clean_author(self, author):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
142 try:
143 return self._authors[author]
144 except KeyError:
145 pass
147 try:
148 clean_author = Ctx().cvs_author_decoder(author)
149 except UnicodeError:
150 self._authors[author] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
153 try:
154 clean_author = clean_author.encode('utf8')
155 except UnicodeError:
156 self._authors[author] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
159 self._authors[author] = clean_author
160 return clean_author
162 def _get_clean_log_msg(self, log_msg):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
168 try:
169 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
170 except UnicodeError:
171 raise UnicodeError(
172 'Problem decoding log message:\n'
173 '%s\n'
174 '%s\n'
175 '%s'
176 % ('-' * 75, log_msg, '-' * 75,)
179 try:
180 return clean_log_msg.encode('utf8')
181 except UnicodeError:
182 raise UnicodeError(
183 'Problem encoding log message:\n'
184 '%s\n'
185 '%s\n'
186 '%s'
187 % ('-' * 75, log_msg, '-' * 75,)
190 def _clean_metadata(self, metadata):
191 """Clean up METADATA by overwriting its members as necessary."""
193 try:
194 metadata.author = self._get_clean_author(metadata.author)
195 except UnicodeError, e:
196 logger.warn('%s: %s' % (warning_prefix, e,))
197 self.warnings = True
199 try:
200 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
201 except UnicodeError, e:
202 logger.warn('%s: %s' % (warning_prefix, e,))
203 self.warnings = True
205 def run(self, run_options, stats_keeper):
206 logger.quiet("Converting metadata to UTF8...")
207 metadata_db = MetadataDatabase(
208 artifact_manager.get_temp_file(config.METADATA_STORE),
209 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
210 DB_OPEN_READ,
212 metadata_clean_db = MetadataDatabase(
213 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
214 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
215 DB_OPEN_NEW,
218 self.warnings = False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
222 self._authors = {}
224 for id in metadata_db.iterkeys():
225 metadata = metadata_db[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata.original_author = metadata.author
231 self._clean_metadata(metadata)
233 metadata_clean_db[id] = metadata
235 if self.warnings:
236 raise FatalError(
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db.close()
244 metadata_db.close()
245 logger.quiet("Done")
248 class CollateSymbolsPass(Pass):
249 """Divide symbols into branches, tags, and excludes."""
251 conversion_names = {
252 Trunk : 'trunk',
253 Branch : 'branch',
254 Tag : 'tag',
255 ExcludedSymbol : 'exclude',
256 Symbol : '.',
259 def register_artifacts(self):
260 self._register_temp_file(config.SYMBOL_DB)
261 self._register_temp_file_needed(config.PROJECTS)
262 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
264 def get_symbol(self, run_options, stats):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
275 symbol = stats.lod
276 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
277 for rule in rules:
278 symbol = rule.get_symbol(symbol, stats)
279 assert symbol is not None
281 stats.check_valid(symbol)
283 return symbol
285 def log_symbol_summary(self, stats, symbol):
286 if not self.symbol_info_file:
287 return
289 if isinstance(symbol, Trunk):
290 name = '.trunk.'
291 preferred_parent_name = '.'
292 else:
293 name = stats.lod.name
294 if symbol.preferred_parent_id is None:
295 preferred_parent_name = '.'
296 else:
297 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
298 if isinstance(preferred_parent, Trunk):
299 preferred_parent_name = '.trunk.'
300 else:
301 preferred_parent_name = preferred_parent.name
303 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
304 symbol_path = symbol.base_path
305 else:
306 symbol_path = '.'
308 self.symbol_info_file.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats.lod.project.id,
311 name,
312 self.conversion_names[symbol.__class__],
313 symbol_path,
314 preferred_parent_name,
317 self.symbol_info_file.write(' # %s\n' % (stats,))
318 parent_counts = stats.possible_parents.items()
319 if parent_counts:
320 self.symbol_info_file.write(' # Possible parents:\n')
321 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
322 for (pp, count) in parent_counts:
323 if isinstance(pp, Trunk):
324 self.symbol_info_file.write(
325 ' # .trunk. : %d\n' % (count,)
327 else:
328 self.symbol_info_file.write(
329 ' # %s : %d\n' % (pp.name, count,)
332 def get_symbols(self, run_options):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
339 SYMBOL_STATS.
341 Raise FatalError if there was an error."""
343 errors = []
344 mismatches = []
346 if Ctx().symbol_info_filename is not None:
347 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
348 self.symbol_info_file.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
352 else:
353 self.symbol_info_file = None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
358 rules = {}
359 for rule_list in run_options.project_symbol_strategy_rules:
360 for rule in rule_list:
361 rules[id(rule)] = rule
363 for rule in rules.itervalues():
364 rule.start(self.symbol_stats)
366 retval = {}
368 for stats in self.symbol_stats:
369 try:
370 symbol = self.get_symbol(run_options, stats)
371 except IndeterminateSymbolException, e:
372 self.log_symbol_summary(stats, stats.lod)
373 mismatches.append(e.stats)
374 except SymbolPlanError, e:
375 self.log_symbol_summary(stats, stats.lod)
376 errors.append(e)
377 else:
378 self.log_symbol_summary(stats, symbol)
379 retval[stats.lod] = symbol
381 for rule in rules.itervalues():
382 rule.finish()
384 if self.symbol_info_file:
385 self.symbol_info_file.close()
387 del self.symbol_info_file
389 if errors or mismatches:
390 s = ['Problems determining how symbols should be converted:\n']
391 for e in errors:
392 s.append('%s\n' % (e,))
393 if mismatches:
394 s.append(
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
398 'and/or\n'
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats in mismatches:
402 s.append(' %s\n' % (stats,))
403 raise FatalError(''.join(s))
404 else:
405 return retval
407 def run(self, run_options, stats_keeper):
408 Ctx()._projects = read_projects(
409 artifact_manager.get_temp_file(config.PROJECTS)
411 self.symbol_stats = SymbolStatistics(
412 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
415 symbol_map = self.get_symbols(run_options)
417 # Check the symbols for consistency and bail out if there were errors:
418 self.symbol_stats.check_consistency(symbol_map)
420 # Check that the symbols all have SVN paths set and that the paths
421 # are disjoint:
422 Ctx().output_option.check_symbols(symbol_map)
424 for symbol in symbol_map.itervalues():
425 if isinstance(symbol, ExcludedSymbol):
426 self.symbol_stats.exclude_symbol(symbol)
428 create_symbol_database(symbol_map.values())
430 del self.symbol_stats
432 logger.quiet("Done")
435 class FilterSymbolsPass(Pass):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self):
442 self._register_temp_file(config.ITEM_SERIALIZER)
443 self._register_temp_file(config.CVS_REVS_DATAFILE)
444 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
445 self._register_temp_file_needed(config.PROJECTS)
446 self._register_temp_file_needed(config.SYMBOL_DB)
447 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
448 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
449 self._register_temp_file_needed(config.CVS_PATHS_DB)
450 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
451 Ctx().revision_collector.register_artifacts(self)
453 def run(self, run_options, stats_keeper):
454 Ctx()._projects = read_projects(
455 artifact_manager.get_temp_file(config.PROJECTS)
457 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
458 Ctx()._metadata_db = MetadataDatabase(
459 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
460 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
461 DB_OPEN_READ,
463 Ctx()._symbol_db = SymbolDatabase()
464 cvs_item_store = OldCVSItemStore(
465 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
467 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
468 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
469 cPickle.dump(cvs_item_serializer, f, -1)
470 f.close()
472 rev_db = NewSortableCVSRevisionDatabase(
473 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
474 cvs_item_serializer,
477 symbol_db = NewSortableCVSSymbolDatabase(
478 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
479 cvs_item_serializer,
482 revision_collector = Ctx().revision_collector
484 logger.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper.reset_cvs_rev_info()
487 revision_collector.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
491 logger.verbose(cvs_file_items.cvs_file.rcs_path)
492 cvs_file_items.filter_excluded_symbols()
493 cvs_file_items.mutate_symbols()
494 cvs_file_items.adjust_parents()
495 cvs_file_items.refine_symbols()
496 cvs_file_items.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items.record_opened_symbols()
500 cvs_file_items.record_closed_symbols()
501 cvs_file_items.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
504 # file:
505 revision_collector.process_file(cvs_file_items)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
509 for cvs_item in cvs_file_items.values():
510 stats_keeper.record_cvs_item(cvs_item)
512 if isinstance(cvs_item, CVSRevision):
513 rev_db.add(cvs_item)
514 elif isinstance(cvs_item, CVSSymbol):
515 symbol_db.add(cvs_item)
517 stats_keeper.set_stats_reflect_exclude(True)
519 rev_db.close()
520 symbol_db.close()
521 revision_collector.finish()
522 cvs_item_store.close()
523 Ctx()._symbol_db.close()
524 Ctx()._cvs_path_db.close()
526 logger.quiet("Done")
529 class SortRevisionsPass(Pass):
530 """Sort the revisions file."""
532 def register_artifacts(self):
533 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
534 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
536 def run(self, run_options, stats_keeper):
537 logger.quiet("Sorting CVS revision summaries...")
538 sort_file(
539 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
540 artifact_manager.get_temp_file(
541 config.CVS_REVS_SORTED_DATAFILE
543 tempdirs=[Ctx().tmpdir],
545 logger.quiet("Done")
548 class SortSymbolsPass(Pass):
549 """Sort the symbols file."""
551 def register_artifacts(self):
552 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
553 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
555 def run(self, run_options, stats_keeper):
556 logger.quiet("Sorting CVS symbol summaries...")
557 sort_file(
558 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
559 artifact_manager.get_temp_file(
560 config.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs=[Ctx().tmpdir],
564 logger.quiet("Done")
567 class InitializeChangesetsPass(Pass):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self):
571 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
572 self._register_temp_file(config.CHANGESETS_STORE)
573 self._register_temp_file(config.CHANGESETS_INDEX)
574 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
575 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
576 self._register_temp_file_needed(config.PROJECTS)
577 self._register_temp_file_needed(config.SYMBOL_DB)
578 self._register_temp_file_needed(config.CVS_PATHS_DB)
579 self._register_temp_file_needed(config.ITEM_SERIALIZER)
580 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
581 self._register_temp_file_needed(
582 config.CVS_SYMBOLS_SORTED_DATAFILE)
584 def get_revision_changesets(self):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id = None
592 old_timestamp = None
593 changeset_items = []
595 db = OldSortableCVSRevisionDatabase(
596 artifact_manager.get_temp_file(
597 config.CVS_REVS_SORTED_DATAFILE
599 self.cvs_item_serializer,
602 for cvs_rev in db:
603 if cvs_rev.metadata_id != old_metadata_id \
604 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
605 # Start a new changeset. First finish up the old changeset,
606 # if any:
607 if changeset_items:
608 yield changeset_items
609 changeset_items = []
610 old_metadata_id = cvs_rev.metadata_id
611 changeset_items.append(cvs_rev)
612 old_timestamp = cvs_rev.timestamp
614 # Finish up the last changeset, if any:
615 if changeset_items:
616 yield changeset_items
618 def get_symbol_changesets(self):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
624 old_symbol_id = None
625 changeset_items = []
627 db = OldSortableCVSSymbolDatabase(
628 artifact_manager.get_temp_file(
629 config.CVS_SYMBOLS_SORTED_DATAFILE
631 self.cvs_item_serializer,
634 for cvs_symbol in db:
635 if cvs_symbol.symbol.id != old_symbol_id:
636 # Start a new changeset. First finish up the old changeset,
637 # if any:
638 if changeset_items:
639 yield changeset_items
640 changeset_items = []
641 old_symbol_id = cvs_symbol.symbol.id
642 changeset_items.append(cvs_symbol)
644 # Finish up the last changeset, if any:
645 if changeset_items:
646 yield changeset_items
648 @staticmethod
649 def compare_items(a, b):
650 return (
651 cmp(a.timestamp, b.timestamp)
652 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
653 or cmp([int(x) for x in a.rev.split('.')],
654 [int(x) for x in b.rev.split('.')])
655 or cmp(a.id, b.id))
657 def break_internal_dependencies(self, changeset_items):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
673 # on each other.
674 dependencies = []
675 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
676 for cvs_item in changeset_items:
677 for next_id in cvs_item.get_succ_ids():
678 if next_id in changeset_cvs_item_ids:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id == cvs_item.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item,))
683 dependencies.append((cvs_item.id, next_id,))
685 if dependencies:
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items.sort(self.compare_items)
689 indexes = {}
690 for (i, changeset_item) in enumerate(changeset_items):
691 indexes[changeset_item.id] = i
693 # How many internal dependencies would be broken by breaking the
694 # Changeset after a particular index?
695 breaks = [0] * len(changeset_items)
696 for (pred, succ,) in dependencies:
697 pred_index = indexes[pred]
698 succ_index = indexes[succ]
699 breaks[min(pred_index, succ_index)] += 1
700 breaks[max(pred_index, succ_index)] -= 1
701 for i in range(1, len(breaks)):
702 breaks[i] += breaks[i - 1]
704 best_i = None
705 best_count = -1
706 best_time = 0
707 for i in range(0, len(breaks) - 1):
708 if breaks[i] > best_count:
709 best_i = i
710 best_count = breaks[i]
711 best_time = (changeset_items[i + 1].timestamp
712 - changeset_items[i].timestamp)
713 elif breaks[i] == best_count \
714 and (changeset_items[i + 1].timestamp
715 - changeset_items[i].timestamp) < best_time:
716 best_i = i
717 best_count = breaks[i]
718 best_time = (changeset_items[i + 1].timestamp
719 - changeset_items[i].timestamp)
720 # Reuse the old changeset.id for the first of the split changesets.
721 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
722 else:
723 return [changeset_items]
725 def break_all_internal_dependencies(self, changeset_items):
726 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
728 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
729 be part of a single changeset. Break this list into sublists,
730 where the CVSRevisions in each sublist are free of mutual
731 dependencies."""
733 # This method is written non-recursively to avoid any possible
734 # problems with recursion depth.
736 changesets_to_split = [changeset_items]
737 while changesets_to_split:
738 changesets = self.break_internal_dependencies(changesets_to_split.pop())
739 if len(changesets) == 1:
740 [changeset_items] = changesets
741 yield changeset_items
742 else:
743 # The changeset had to be split; see if either of the
744 # fragments have to be split:
745 changesets.reverse()
746 changesets_to_split.extend(changesets)
748 def get_changesets(self):
749 """Generate (Changeset, [CVSItem,...]) for all changesets.
751 The Changesets already have their internal dependencies broken.
752 The [CVSItem,...] list is the list of CVSItems in the
753 corresponding Changeset."""
755 for changeset_items in self.get_revision_changesets():
756 for split_changeset_items \
757 in self.break_all_internal_dependencies(changeset_items):
758 yield (
759 RevisionChangeset(
760 self.changeset_key_generator.gen_id(),
761 [cvs_rev.id for cvs_rev in split_changeset_items]
763 split_changeset_items,
766 for changeset_items in self.get_symbol_changesets():
767 yield (
768 create_symbol_changeset(
769 self.changeset_key_generator.gen_id(),
770 changeset_items[0].symbol,
771 [cvs_symbol.id for cvs_symbol in changeset_items]
773 changeset_items,
776 def run(self, run_options, stats_keeper):
777 logger.quiet("Creating preliminary commit sets...")
779 Ctx()._projects = read_projects(
780 artifact_manager.get_temp_file(config.PROJECTS)
782 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
783 Ctx()._symbol_db = SymbolDatabase()
785 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
786 self.cvs_item_serializer = cPickle.load(f)
787 f.close()
789 changeset_db = ChangesetDatabase(
790 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
791 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
792 DB_OPEN_NEW,
794 cvs_item_to_changeset_id = CVSItemToChangesetTable(
795 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
796 DB_OPEN_NEW,
799 self.sorted_cvs_items_db = IndexedCVSItemStore(
800 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
801 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
802 DB_OPEN_NEW)
804 self.changeset_key_generator = KeyGenerator()
806 for (changeset, changeset_items) in self.get_changesets():
807 if logger.is_on(logger.DEBUG):
808 logger.debug(repr(changeset))
809 changeset_db.store(changeset)
810 for cvs_item in changeset_items:
811 self.sorted_cvs_items_db.add(cvs_item)
812 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
814 self.sorted_cvs_items_db.close()
815 cvs_item_to_changeset_id.close()
816 changeset_db.close()
817 Ctx()._symbol_db.close()
818 Ctx()._cvs_path_db.close()
820 del self.cvs_item_serializer
822 logger.quiet("Done")
825 class ProcessedChangesetLogger:
826 def __init__(self):
827 self.processed_changeset_ids = []
829 def log(self, changeset_id):
830 if logger.is_on(logger.DEBUG):
831 self.processed_changeset_ids.append(changeset_id)
833 def flush(self):
834 if self.processed_changeset_ids:
835 logger.debug(
836 'Consumed changeset ids %s'
837 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
839 del self.processed_changeset_ids[:]
842 class BreakRevisionChangesetCyclesPass(Pass):
843 """Break up any dependency cycles involving only RevisionChangesets."""
845 def register_artifacts(self):
846 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
847 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
848 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
849 self._register_temp_file_needed(config.PROJECTS)
850 self._register_temp_file_needed(config.SYMBOL_DB)
851 self._register_temp_file_needed(config.CVS_PATHS_DB)
852 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
853 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
854 self._register_temp_file_needed(config.CHANGESETS_STORE)
855 self._register_temp_file_needed(config.CHANGESETS_INDEX)
856 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
858 def get_source_changesets(self):
859 old_changeset_db = ChangesetDatabase(
860 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
861 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
862 DB_OPEN_READ)
864 changeset_ids = old_changeset_db.keys()
866 for changeset_id in changeset_ids:
867 yield old_changeset_db[changeset_id]
869 old_changeset_db.close()
870 del old_changeset_db
872 def break_cycle(self, cycle):
873 """Break up one or more changesets in CYCLE to help break the cycle.
875 CYCLE is a list of Changesets where
877 cycle[i] depends on cycle[i - 1]
879 Break up one or more changesets in CYCLE to make progress towards
880 breaking the cycle. Update self.changeset_graph accordingly.
882 It is not guaranteed that the cycle will be broken by one call to
883 this routine, but at least some progress must be made."""
885 self.processed_changeset_logger.flush()
886 best_i = None
887 best_link = None
888 for i in range(len(cycle)):
889 # It's OK if this index wraps to -1:
890 link = ChangesetGraphLink(
891 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
893 if best_i is None or link < best_link:
894 best_i = i
895 best_link = link
897 if logger.is_on(logger.DEBUG):
898 logger.debug(
899 'Breaking cycle %s by breaking node %x' % (
900 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
901 best_link.changeset.id,))
903 new_changesets = best_link.break_changeset(self.changeset_key_generator)
905 self.changeset_graph.delete_changeset(best_link.changeset)
907 for changeset in new_changesets:
908 self.changeset_graph.add_new_changeset(changeset)
910 def run(self, run_options, stats_keeper):
911 logger.quiet("Breaking revision changeset dependency cycles...")
913 Ctx()._projects = read_projects(
914 artifact_manager.get_temp_file(config.PROJECTS)
916 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
917 Ctx()._symbol_db = SymbolDatabase()
918 Ctx()._cvs_items_db = IndexedCVSItemStore(
919 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
920 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
921 DB_OPEN_READ)
923 shutil.copyfile(
924 artifact_manager.get_temp_file(
925 config.CVS_ITEM_TO_CHANGESET),
926 artifact_manager.get_temp_file(
927 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
928 cvs_item_to_changeset_id = CVSItemToChangesetTable(
929 artifact_manager.get_temp_file(
930 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
931 DB_OPEN_WRITE)
933 changeset_db = ChangesetDatabase(
934 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
935 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
936 DB_OPEN_NEW)
938 self.changeset_graph = ChangesetGraph(
939 changeset_db, cvs_item_to_changeset_id
942 max_changeset_id = 0
943 for changeset in self.get_source_changesets():
944 changeset_db.store(changeset)
945 if isinstance(changeset, RevisionChangeset):
946 self.changeset_graph.add_changeset(changeset)
947 max_changeset_id = max(max_changeset_id, changeset.id)
949 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
951 self.processed_changeset_logger = ProcessedChangesetLogger()
953 # Consume the graph, breaking cycles using self.break_cycle():
954 for (changeset, time_range) in self.changeset_graph.consume_graph(
955 cycle_breaker=self.break_cycle
957 self.processed_changeset_logger.log(changeset.id)
959 self.processed_changeset_logger.flush()
960 del self.processed_changeset_logger
962 self.changeset_graph.close()
963 self.changeset_graph = None
964 Ctx()._cvs_items_db.close()
965 Ctx()._symbol_db.close()
966 Ctx()._cvs_path_db.close()
968 logger.quiet("Done")
971 class RevisionTopologicalSortPass(Pass):
972 """Sort RevisionChangesets into commit order.
974 Also convert them to OrderedChangesets, without changing their ids."""
976 def register_artifacts(self):
977 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
978 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
979 self._register_temp_file_needed(config.PROJECTS)
980 self._register_temp_file_needed(config.SYMBOL_DB)
981 self._register_temp_file_needed(config.CVS_PATHS_DB)
982 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
983 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
984 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
985 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
986 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
988 def get_source_changesets(self, changeset_db):
989 changeset_ids = changeset_db.keys()
991 for changeset_id in changeset_ids:
992 yield changeset_db[changeset_id]
994 def get_changesets(self):
995 changeset_db = ChangesetDatabase(
996 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
997 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
998 DB_OPEN_READ,
1001 changeset_graph = ChangesetGraph(
1002 changeset_db,
1003 CVSItemToChangesetTable(
1004 artifact_manager.get_temp_file(
1005 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
1007 DB_OPEN_READ,
1011 for changeset in self.get_source_changesets(changeset_db):
1012 if isinstance(changeset, RevisionChangeset):
1013 changeset_graph.add_changeset(changeset)
1014 else:
1015 yield changeset
1017 changeset_ids = []
1019 # Sentry:
1020 changeset_ids.append(None)
1022 for (changeset, time_range) in changeset_graph.consume_graph():
1023 changeset_ids.append(changeset.id)
1025 # Sentry:
1026 changeset_ids.append(None)
1028 for i in range(1, len(changeset_ids) - 1):
1029 changeset = changeset_db[changeset_ids[i]]
1030 yield OrderedChangeset(
1031 changeset.id, changeset.cvs_item_ids, i - 1,
1032 changeset_ids[i - 1], changeset_ids[i + 1])
1034 changeset_graph.close()
1036 def run(self, run_options, stats_keeper):
1037 logger.quiet("Generating CVSRevisions in commit order...")
1039 Ctx()._projects = read_projects(
1040 artifact_manager.get_temp_file(config.PROJECTS)
1042 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1043 Ctx()._symbol_db = SymbolDatabase()
1044 Ctx()._cvs_items_db = IndexedCVSItemStore(
1045 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1046 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1047 DB_OPEN_READ)
1049 changesets_revordered_db = ChangesetDatabase(
1050 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1051 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1052 DB_OPEN_NEW)
1054 for changeset in self.get_changesets():
1055 changesets_revordered_db.store(changeset)
1057 changesets_revordered_db.close()
1058 Ctx()._cvs_items_db.close()
1059 Ctx()._symbol_db.close()
1060 Ctx()._cvs_path_db.close()
1062 logger.quiet("Done")
1065 class BreakSymbolChangesetCyclesPass(Pass):
1066 """Break up any dependency cycles involving only SymbolChangesets."""
1068 def register_artifacts(self):
1069 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1070 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1071 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1072 self._register_temp_file_needed(config.PROJECTS)
1073 self._register_temp_file_needed(config.SYMBOL_DB)
1074 self._register_temp_file_needed(config.CVS_PATHS_DB)
1075 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1076 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1077 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1078 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1079 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1081 def get_source_changesets(self):
1082 old_changeset_db = ChangesetDatabase(
1083 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1084 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1085 DB_OPEN_READ)
1087 changeset_ids = old_changeset_db.keys()
1089 for changeset_id in changeset_ids:
1090 yield old_changeset_db[changeset_id]
1092 old_changeset_db.close()
1094 def break_cycle(self, cycle):
1095 """Break up one or more changesets in CYCLE to help break the cycle.
1097 CYCLE is a list of Changesets where
1099 cycle[i] depends on cycle[i - 1]
1101 Break up one or more changesets in CYCLE to make progress towards
1102 breaking the cycle. Update self.changeset_graph accordingly.
1104 It is not guaranteed that the cycle will be broken by one call to
1105 this routine, but at least some progress must be made."""
1107 self.processed_changeset_logger.flush()
1108 best_i = None
1109 best_link = None
1110 for i in range(len(cycle)):
1111 # It's OK if this index wraps to -1:
1112 link = ChangesetGraphLink(
1113 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1115 if best_i is None or link < best_link:
1116 best_i = i
1117 best_link = link
1119 if logger.is_on(logger.DEBUG):
1120 logger.debug(
1121 'Breaking cycle %s by breaking node %x' % (
1122 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1123 best_link.changeset.id,))
1125 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1127 self.changeset_graph.delete_changeset(best_link.changeset)
1129 for changeset in new_changesets:
1130 self.changeset_graph.add_new_changeset(changeset)
1132 def run(self, run_options, stats_keeper):
1133 logger.quiet("Breaking symbol changeset dependency cycles...")
1135 Ctx()._projects = read_projects(
1136 artifact_manager.get_temp_file(config.PROJECTS)
1138 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1139 Ctx()._symbol_db = SymbolDatabase()
1140 Ctx()._cvs_items_db = IndexedCVSItemStore(
1141 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1142 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1143 DB_OPEN_READ)
1145 shutil.copyfile(
1146 artifact_manager.get_temp_file(
1147 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1148 artifact_manager.get_temp_file(
1149 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1150 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1151 artifact_manager.get_temp_file(
1152 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1153 DB_OPEN_WRITE)
1155 changeset_db = ChangesetDatabase(
1156 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1157 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1158 DB_OPEN_NEW)
1160 self.changeset_graph = ChangesetGraph(
1161 changeset_db, cvs_item_to_changeset_id
1164 max_changeset_id = 0
1165 for changeset in self.get_source_changesets():
1166 changeset_db.store(changeset)
1167 if isinstance(changeset, SymbolChangeset):
1168 self.changeset_graph.add_changeset(changeset)
1169 max_changeset_id = max(max_changeset_id, changeset.id)
1171 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1173 self.processed_changeset_logger = ProcessedChangesetLogger()
1175 # Consume the graph, breaking cycles using self.break_cycle():
1176 for (changeset, time_range) in self.changeset_graph.consume_graph(
1177 cycle_breaker=self.break_cycle
1179 self.processed_changeset_logger.log(changeset.id)
1181 self.processed_changeset_logger.flush()
1182 del self.processed_changeset_logger
1184 self.changeset_graph.close()
1185 self.changeset_graph = None
1186 Ctx()._cvs_items_db.close()
1187 Ctx()._symbol_db.close()
1188 Ctx()._cvs_path_db.close()
1190 logger.quiet("Done")
1193 class BreakAllChangesetCyclesPass(Pass):
1194 """Break up any dependency cycles that are closed by SymbolChangesets."""
1196 def register_artifacts(self):
1197 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1198 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1199 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1200 self._register_temp_file_needed(config.PROJECTS)
1201 self._register_temp_file_needed(config.SYMBOL_DB)
1202 self._register_temp_file_needed(config.CVS_PATHS_DB)
1203 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1204 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1205 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1206 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1207 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1209 def get_source_changesets(self):
1210 old_changeset_db = ChangesetDatabase(
1211 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1212 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1213 DB_OPEN_READ)
1215 changeset_ids = old_changeset_db.keys()
1217 for changeset_id in changeset_ids:
1218 yield old_changeset_db[changeset_id]
1220 old_changeset_db.close()
1222 def _split_retrograde_changeset(self, changeset):
1223 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1225 logger.debug('Breaking retrograde changeset %x' % (changeset.id,))
1227 self.changeset_graph.delete_changeset(changeset)
1229 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1230 ordinal_limits = {}
1231 for cvs_branch in changeset.iter_cvs_items():
1232 max_pred_ordinal = 0
1233 min_succ_ordinal = sys.maxint
1235 for pred_id in cvs_branch.get_pred_ids():
1236 pred_ordinal = self.ordinals.get(
1237 self.cvs_item_to_changeset_id[pred_id], 0)
1238 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1240 for succ_id in cvs_branch.get_succ_ids():
1241 succ_ordinal = self.ordinals.get(
1242 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1243 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1245 assert max_pred_ordinal < min_succ_ordinal
1246 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1248 # Find the earliest successor ordinal:
1249 min_min_succ_ordinal = sys.maxint
1250 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1251 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1253 early_item_ids = []
1254 late_item_ids = []
1255 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1256 if max_pred_ordinal >= min_min_succ_ordinal:
1257 late_item_ids.append(id)
1258 else:
1259 early_item_ids.append(id)
1261 assert early_item_ids
1262 assert late_item_ids
1264 early_changeset = changeset.create_split_changeset(
1265 self.changeset_key_generator.gen_id(), early_item_ids)
1266 late_changeset = changeset.create_split_changeset(
1267 self.changeset_key_generator.gen_id(), late_item_ids)
1269 self.changeset_graph.add_new_changeset(early_changeset)
1270 self.changeset_graph.add_new_changeset(late_changeset)
1272 early_split = self._split_if_retrograde(early_changeset.id)
1274 # Because of the way we constructed it, the early changeset should
1275 # not have to be split:
1276 assert not early_split
1278 self._split_if_retrograde(late_changeset.id)
1280 def _split_if_retrograde(self, changeset_id):
1281 node = self.changeset_graph[changeset_id]
1282 pred_ordinals = [
1283 self.ordinals[id]
1284 for id in node.pred_ids
1285 if id in self.ordinals
1287 pred_ordinals.sort()
1288 succ_ordinals = [
1289 self.ordinals[id]
1290 for id in node.succ_ids
1291 if id in self.ordinals
1293 succ_ordinals.sort()
1294 if pred_ordinals and succ_ordinals \
1295 and pred_ordinals[-1] >= succ_ordinals[0]:
1296 self._split_retrograde_changeset(self.changeset_db[node.id])
1297 return True
1298 else:
1299 return False
1301 def break_segment(self, segment):
1302 """Break a changeset in SEGMENT[1:-1].
1304 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1305 that range are SymbolChangesets."""
1307 best_i = None
1308 best_link = None
1309 for i in range(1, len(segment) - 1):
1310 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1312 if best_i is None or link < best_link:
1313 best_i = i
1314 best_link = link
1316 if logger.is_on(logger.DEBUG):
1317 logger.debug(
1318 'Breaking segment %s by breaking node %x' % (
1319 ' -> '.join(['%x' % node.id for node in segment]),
1320 best_link.changeset.id,))
1322 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1324 self.changeset_graph.delete_changeset(best_link.changeset)
1326 for changeset in new_changesets:
1327 self.changeset_graph.add_new_changeset(changeset)
1329 def break_cycle(self, cycle):
1330 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1332 CYCLE is a list of SymbolChangesets where
1334 cycle[i] depends on cycle[i - 1]
1336 . Break up one or more changesets in CYCLE to make progress
1337 towards breaking the cycle. Update self.changeset_graph
1338 accordingly.
1340 It is not guaranteed that the cycle will be broken by one call to
1341 this routine, but at least some progress must be made."""
1343 if logger.is_on(logger.DEBUG):
1344 logger.debug(
1345 'Breaking cycle %s' % (
1346 ' -> '.join(['%x' % changeset.id
1347 for changeset in cycle + [cycle[0]]]),))
1349 # Unwrap the cycle into a segment then break the segment:
1350 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1352 def run(self, run_options, stats_keeper):
1353 logger.quiet("Breaking CVSSymbol dependency loops...")
1355 Ctx()._projects = read_projects(
1356 artifact_manager.get_temp_file(config.PROJECTS)
1358 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1359 Ctx()._symbol_db = SymbolDatabase()
1360 Ctx()._cvs_items_db = IndexedCVSItemStore(
1361 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1362 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1363 DB_OPEN_READ)
1365 shutil.copyfile(
1366 artifact_manager.get_temp_file(
1367 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1368 artifact_manager.get_temp_file(
1369 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1370 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1371 artifact_manager.get_temp_file(
1372 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1373 DB_OPEN_WRITE)
1375 self.changeset_db = ChangesetDatabase(
1376 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1377 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1378 DB_OPEN_NEW)
1380 self.changeset_graph = ChangesetGraph(
1381 self.changeset_db, self.cvs_item_to_changeset_id
1384 # A map {changeset_id : ordinal} for OrderedChangesets:
1385 self.ordinals = {}
1386 # A map {ordinal : changeset_id}:
1387 ordered_changeset_map = {}
1388 # A list of all BranchChangeset ids:
1389 branch_changeset_ids = []
1390 max_changeset_id = 0
1391 for changeset in self.get_source_changesets():
1392 self.changeset_db.store(changeset)
1393 self.changeset_graph.add_changeset(changeset)
1394 if isinstance(changeset, OrderedChangeset):
1395 ordered_changeset_map[changeset.ordinal] = changeset.id
1396 self.ordinals[changeset.id] = changeset.ordinal
1397 elif isinstance(changeset, BranchChangeset):
1398 branch_changeset_ids.append(changeset.id)
1399 max_changeset_id = max(max_changeset_id, changeset.id)
1401 # An array of ordered_changeset ids, indexed by ordinal:
1402 ordered_changesets = []
1403 for ordinal in range(len(ordered_changeset_map)):
1404 id = ordered_changeset_map[ordinal]
1405 ordered_changesets.append(id)
1407 ordered_changeset_ids = set(ordered_changeset_map.values())
1408 del ordered_changeset_map
1410 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1412 # First we scan through all BranchChangesets looking for
1413 # changesets that are individually "retrograde" and splitting
1414 # those up:
1415 for changeset_id in branch_changeset_ids:
1416 self._split_if_retrograde(changeset_id)
1418 del self.ordinals
1420 next_ordered_changeset = 0
1422 self.processed_changeset_logger = ProcessedChangesetLogger()
1424 while self.changeset_graph:
1425 # Consume any nodes that don't have predecessors:
1426 for (changeset, time_range) \
1427 in self.changeset_graph.consume_nopred_nodes():
1428 self.processed_changeset_logger.log(changeset.id)
1429 if changeset.id in ordered_changeset_ids:
1430 next_ordered_changeset += 1
1431 ordered_changeset_ids.remove(changeset.id)
1433 self.processed_changeset_logger.flush()
1435 if not self.changeset_graph:
1436 break
1438 # Now work on the next ordered changeset that has not yet been
1439 # processed. BreakSymbolChangesetCyclesPass has broken any
1440 # cycles involving only SymbolChangesets, so the presence of a
1441 # cycle implies that there is at least one ordered changeset
1442 # left in the graph:
1443 assert next_ordered_changeset < len(ordered_changesets)
1445 id = ordered_changesets[next_ordered_changeset]
1446 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1447 if path:
1448 if logger.is_on(logger.DEBUG):
1449 logger.debug('Breaking path from %s to %s' % (path[0], path[-1],))
1450 self.break_segment(path)
1451 else:
1452 # There were no ordered changesets among the reachable
1453 # predecessors, so do generic cycle-breaking:
1454 if logger.is_on(logger.DEBUG):
1455 logger.debug(
1456 'Breaking generic cycle found from %s'
1457 % (self.changeset_db[id],)
1459 self.break_cycle(self.changeset_graph.find_cycle(id))
1461 del self.processed_changeset_logger
1462 self.changeset_graph.close()
1463 self.changeset_graph = None
1464 self.cvs_item_to_changeset_id = None
1465 self.changeset_db = None
1467 logger.quiet("Done")
1470 class TopologicalSortPass(Pass):
1471 """Sort changesets into commit order."""
1473 def register_artifacts(self):
1474 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1475 self._register_temp_file_needed(config.PROJECTS)
1476 self._register_temp_file_needed(config.SYMBOL_DB)
1477 self._register_temp_file_needed(config.CVS_PATHS_DB)
1478 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1479 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1480 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1481 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1482 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1484 def get_source_changesets(self, changeset_db):
1485 for changeset_id in changeset_db.keys():
1486 yield changeset_db[changeset_id]
1488 def get_changesets(self):
1489 """Generate (changeset, timestamp) pairs in commit order."""
1491 changeset_db = ChangesetDatabase(
1492 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1493 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1494 DB_OPEN_READ)
1496 changeset_graph = ChangesetGraph(
1497 changeset_db,
1498 CVSItemToChangesetTable(
1499 artifact_manager.get_temp_file(
1500 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1502 DB_OPEN_READ,
1505 symbol_changeset_ids = set()
1507 for changeset in self.get_source_changesets(changeset_db):
1508 changeset_graph.add_changeset(changeset)
1509 if isinstance(changeset, SymbolChangeset):
1510 symbol_changeset_ids.add(changeset.id)
1512 # Ensure a monotonically-increasing timestamp series by keeping
1513 # track of the previous timestamp and ensuring that the following
1514 # one is larger.
1515 timestamper = Timestamper()
1517 for (changeset, time_range) in changeset_graph.consume_graph():
1518 timestamp = timestamper.get(
1519 time_range.t_max, changeset.id in symbol_changeset_ids
1521 yield (changeset, timestamp)
1523 changeset_graph.close()
1525 def run(self, run_options, stats_keeper):
1526 logger.quiet("Generating CVSRevisions in commit order...")
1528 Ctx()._projects = read_projects(
1529 artifact_manager.get_temp_file(config.PROJECTS)
1531 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1532 Ctx()._symbol_db = SymbolDatabase()
1533 Ctx()._cvs_items_db = IndexedCVSItemStore(
1534 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1535 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1536 DB_OPEN_READ)
1538 sorted_changesets = open(
1539 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1540 'w')
1542 for (changeset, timestamp) in self.get_changesets():
1543 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1545 sorted_changesets.close()
1547 Ctx()._cvs_items_db.close()
1548 Ctx()._symbol_db.close()
1549 Ctx()._cvs_path_db.close()
1551 logger.quiet("Done")
1554 class CreateRevsPass(Pass):
1555 """Generate the SVNCommit <-> CVSRevision mapping databases.
1557 SVNCommitCreator also calls SymbolingsLogger to register
1558 CVSRevisions that represent an opening or closing for a path on a
1559 branch or tag. See SymbolingsLogger for more details.
1561 This pass was formerly known as pass5."""
1563 def register_artifacts(self):
1564 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1565 self._register_temp_file(config.SVN_COMMITS_STORE)
1566 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1567 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1568 self._register_temp_file_needed(config.PROJECTS)
1569 self._register_temp_file_needed(config.CVS_PATHS_DB)
1570 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1571 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1572 self._register_temp_file_needed(config.SYMBOL_DB)
1573 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1574 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1575 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1577 def get_changesets(self):
1578 """Generate (changeset,timestamp,) tuples in commit order."""
1580 changeset_db = ChangesetDatabase(
1581 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1582 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1583 DB_OPEN_READ)
1585 for line in file(
1586 artifact_manager.get_temp_file(
1587 config.CHANGESETS_SORTED_DATAFILE)):
1588 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1589 yield (changeset_db[changeset_id], timestamp)
1591 changeset_db.close()
1593 def get_svn_commits(self, creator):
1594 """Generate the SVNCommits, in order."""
1596 for (changeset, timestamp) in self.get_changesets():
1597 for svn_commit in creator.process_changeset(changeset, timestamp):
1598 yield svn_commit
1600 def log_svn_commit(self, svn_commit):
1601 """Output information about SVN_COMMIT."""
1603 logger.normal(
1604 'Creating Subversion r%d (%s)'
1605 % (svn_commit.revnum, svn_commit.get_description(),)
1608 if isinstance(svn_commit, SVNRevisionCommit):
1609 for cvs_rev in svn_commit.cvs_revs:
1610 logger.verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1612 def run(self, run_options, stats_keeper):
1613 logger.quiet("Mapping CVS revisions to Subversion commits...")
1615 Ctx()._projects = read_projects(
1616 artifact_manager.get_temp_file(config.PROJECTS)
1618 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1619 Ctx()._symbol_db = SymbolDatabase()
1620 Ctx()._cvs_items_db = IndexedCVSItemStore(
1621 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1622 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1623 DB_OPEN_READ)
1625 Ctx()._symbolings_logger = SymbolingsLogger()
1627 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1629 creator = SVNCommitCreator()
1630 for svn_commit in self.get_svn_commits(creator):
1631 self.log_svn_commit(svn_commit)
1632 persistence_manager.put_svn_commit(svn_commit)
1634 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1635 del creator
1637 persistence_manager.close()
1638 Ctx()._symbolings_logger.close()
1639 Ctx()._cvs_items_db.close()
1640 Ctx()._symbol_db.close()
1641 Ctx()._cvs_path_db.close()
1643 logger.quiet("Done")
1646 class SortSymbolOpeningsClosingsPass(Pass):
1647 """This pass was formerly known as pass6."""
1649 def register_artifacts(self):
1650 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1651 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1653 def run(self, run_options, stats_keeper):
1654 logger.quiet("Sorting symbolic name source revisions...")
1656 def sort_key(line):
1657 line = line.split(' ', 2)
1658 return (int(line[0], 16), int(line[1]), line[2],)
1660 sort_file(
1661 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1662 artifact_manager.get_temp_file(
1663 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1665 key=sort_key,
1666 tempdirs=[Ctx().tmpdir],
1668 logger.quiet("Done")
1671 class IndexSymbolsPass(Pass):
1672 """This pass was formerly known as pass7."""
1674 def register_artifacts(self):
1675 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1676 self._register_temp_file_needed(config.PROJECTS)
1677 self._register_temp_file_needed(config.SYMBOL_DB)
1678 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1680 def generate_offsets_for_symbolings(self):
1681 """This function iterates through all the lines in
1682 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1683 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1684 where SYMBOLIC_NAME is first encountered. This will allow us to
1685 seek to the various offsets in the file and sequentially read only
1686 the openings and closings that we need."""
1688 offsets = {}
1690 f = open(
1691 artifact_manager.get_temp_file(
1692 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1693 'r')
1694 old_id = None
1695 while True:
1696 fpos = f.tell()
1697 line = f.readline()
1698 if not line:
1699 break
1700 id, svn_revnum, ignored = line.split(" ", 2)
1701 id = int(id, 16)
1702 if id != old_id:
1703 logger.verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1704 old_id = id
1705 offsets[id] = fpos
1707 f.close()
1709 offsets_db = file(
1710 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1711 cPickle.dump(offsets, offsets_db, -1)
1712 offsets_db.close()
1714 def run(self, run_options, stats_keeper):
1715 logger.quiet("Determining offsets for all symbolic names...")
1716 Ctx()._projects = read_projects(
1717 artifact_manager.get_temp_file(config.PROJECTS)
1719 Ctx()._symbol_db = SymbolDatabase()
1720 self.generate_offsets_for_symbolings()
1721 Ctx()._symbol_db.close()
1722 logger.quiet("Done.")
1725 class OutputPass(Pass):
1726 """This pass was formerly known as pass8."""
1728 def register_artifacts(self):
1729 self._register_temp_file_needed(config.PROJECTS)
1730 self._register_temp_file_needed(config.CVS_PATHS_DB)
1731 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1732 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1733 self._register_temp_file_needed(config.SYMBOL_DB)
1734 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1735 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1736 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1737 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1738 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1739 Ctx().output_option.register_artifacts(self)
1741 def run(self, run_options, stats_keeper):
1742 Ctx()._projects = read_projects(
1743 artifact_manager.get_temp_file(config.PROJECTS)
1745 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1746 Ctx()._metadata_db = MetadataDatabase(
1747 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1748 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1749 DB_OPEN_READ,
1751 Ctx()._cvs_items_db = IndexedCVSItemStore(
1752 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1753 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1754 DB_OPEN_READ)
1755 Ctx()._symbol_db = SymbolDatabase()
1756 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1758 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1760 svn_revnum = 1
1761 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1762 while svn_commit:
1763 svn_commit.output(Ctx().output_option)
1764 svn_revnum += 1
1765 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1767 Ctx().output_option.cleanup()
1768 Ctx()._persistence_manager.close()
1770 Ctx()._symbol_db.close()
1771 Ctx()._cvs_items_db.close()
1772 Ctx()._metadata_db.close()
1773 Ctx()._cvs_path_db.close()
1776 # The list of passes constituting a run of cvs2svn:
1777 passes = [
1778 CollectRevsPass(),
1779 CleanMetadataPass(),
1780 CollateSymbolsPass(),
1781 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1782 FilterSymbolsPass(),
1783 SortRevisionsPass(),
1784 SortSymbolsPass(),
1785 InitializeChangesetsPass(),
1786 #CheckIndexedItemStoreDependenciesPass(
1787 # config.CVS_ITEMS_SORTED_STORE,
1788 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1789 BreakRevisionChangesetCyclesPass(),
1790 RevisionTopologicalSortPass(),
1791 BreakSymbolChangesetCyclesPass(),
1792 BreakAllChangesetCyclesPass(),
1793 TopologicalSortPass(),
1794 CreateRevsPass(),
1795 SortSymbolOpeningsClosingsPass(),
1796 IndexSymbolsPass(),
1797 OutputPass(),