InitializeChangesetsPass: We are looking for the *biggest* gap.
[cvs2svn.git] / cvs2svn_lib / passes.py
blobcba729feb0f21eb96343b7d6bcc37bb2fa5933b3
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import logger
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.repository_walker import walk_repository
78 from cvs2svn_lib.collect_data import CollectData
79 from cvs2svn_lib.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self):
89 self._register_temp_file(config.PROJECTS)
90 self._register_temp_file(config.SYMBOL_STATISTICS)
91 self._register_temp_file(config.METADATA_INDEX_TABLE)
92 self._register_temp_file(config.METADATA_STORE)
93 self._register_temp_file(config.CVS_PATHS_DB)
94 self._register_temp_file(config.CVS_ITEMS_STORE)
96 def run(self, run_options, stats_keeper):
97 logger.quiet("Examining all CVS ',v' files...")
98 Ctx()._projects = {}
99 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
100 cd = CollectData(stats_keeper)
102 # Key generator for CVSFiles:
103 file_key_generator = KeyGenerator()
105 for project in run_options.projects:
106 Ctx()._projects[project.id] = project
107 cd.process_project(
108 project,
109 walk_repository(project, file_key_generator, cd.record_fatal_error),
111 run_options.projects = None
113 fatal_errors = cd.close()
115 if fatal_errors:
116 raise FatalException("Pass 1 complete.\n"
117 + "=" * 75 + "\n"
118 + "Error summary:\n"
119 + "\n".join(fatal_errors) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs_path_db.close()
123 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
124 logger.quiet("Done")
127 class CleanMetadataPass(Pass):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self):
131 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
132 self._register_temp_file(config.METADATA_CLEAN_STORE)
133 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
134 self._register_temp_file_needed(config.METADATA_STORE)
136 def _get_clean_author(self, author):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
142 try:
143 return self._authors[author]
144 except KeyError:
145 pass
147 try:
148 clean_author = Ctx().cvs_author_decoder(author)
149 except UnicodeError:
150 self._authors[author] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
153 try:
154 clean_author = clean_author.encode('utf8')
155 except UnicodeError:
156 self._authors[author] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
159 self._authors[author] = clean_author
160 return clean_author
162 def _get_clean_log_msg(self, log_msg):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
168 try:
169 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
170 except UnicodeError:
171 raise UnicodeError(
172 'Problem decoding log message:\n'
173 '%s\n'
174 '%s\n'
175 '%s'
176 % ('-' * 75, log_msg, '-' * 75,)
179 try:
180 return clean_log_msg.encode('utf8')
181 except UnicodeError:
182 raise UnicodeError(
183 'Problem encoding log message:\n'
184 '%s\n'
185 '%s\n'
186 '%s'
187 % ('-' * 75, log_msg, '-' * 75,)
190 def _clean_metadata(self, metadata):
191 """Clean up METADATA by overwriting its members as necessary."""
193 try:
194 metadata.author = self._get_clean_author(metadata.author)
195 except UnicodeError, e:
196 logger.warn('%s: %s' % (warning_prefix, e,))
197 self.warnings = True
199 try:
200 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
201 except UnicodeError, e:
202 logger.warn('%s: %s' % (warning_prefix, e,))
203 self.warnings = True
205 def run(self, run_options, stats_keeper):
206 logger.quiet("Converting metadata to UTF8...")
207 metadata_db = MetadataDatabase(
208 artifact_manager.get_temp_file(config.METADATA_STORE),
209 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
210 DB_OPEN_READ,
212 metadata_clean_db = MetadataDatabase(
213 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
214 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
215 DB_OPEN_NEW,
218 self.warnings = False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
222 self._authors = {}
224 for id in metadata_db.iterkeys():
225 metadata = metadata_db[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata.original_author = metadata.author
231 self._clean_metadata(metadata)
233 metadata_clean_db[id] = metadata
235 if self.warnings:
236 raise FatalError(
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db.close()
244 metadata_db.close()
245 logger.quiet("Done")
248 class CollateSymbolsPass(Pass):
249 """Divide symbols into branches, tags, and excludes."""
251 conversion_names = {
252 Trunk : 'trunk',
253 Branch : 'branch',
254 Tag : 'tag',
255 ExcludedSymbol : 'exclude',
256 Symbol : '.',
259 def register_artifacts(self):
260 self._register_temp_file(config.SYMBOL_DB)
261 self._register_temp_file_needed(config.PROJECTS)
262 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
264 def get_symbol(self, run_options, stats):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
275 symbol = stats.lod
276 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
277 for rule in rules:
278 symbol = rule.get_symbol(symbol, stats)
279 assert symbol is not None
281 stats.check_valid(symbol)
283 return symbol
285 def log_symbol_summary(self, stats, symbol):
286 if not self.symbol_info_file:
287 return
289 if isinstance(symbol, Trunk):
290 name = '.trunk.'
291 preferred_parent_name = '.'
292 else:
293 name = stats.lod.name
294 if symbol.preferred_parent_id is None:
295 preferred_parent_name = '.'
296 else:
297 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
298 if isinstance(preferred_parent, Trunk):
299 preferred_parent_name = '.trunk.'
300 else:
301 preferred_parent_name = preferred_parent.name
303 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
304 symbol_path = symbol.base_path
305 else:
306 symbol_path = '.'
308 self.symbol_info_file.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats.lod.project.id,
311 name,
312 self.conversion_names[symbol.__class__],
313 symbol_path,
314 preferred_parent_name,
317 self.symbol_info_file.write(' # %s\n' % (stats,))
318 parent_counts = stats.possible_parents.items()
319 if parent_counts:
320 self.symbol_info_file.write(' # Possible parents:\n')
321 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
322 for (pp, count) in parent_counts:
323 if isinstance(pp, Trunk):
324 self.symbol_info_file.write(
325 ' # .trunk. : %d\n' % (count,)
327 else:
328 self.symbol_info_file.write(
329 ' # %s : %d\n' % (pp.name, count,)
332 def get_symbols(self, run_options):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
339 SYMBOL_STATS.
341 Raise FatalError if there was an error."""
343 errors = []
344 mismatches = []
346 if Ctx().symbol_info_filename is not None:
347 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
348 self.symbol_info_file.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
352 else:
353 self.symbol_info_file = None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
358 rules = {}
359 for rule_list in run_options.project_symbol_strategy_rules:
360 for rule in rule_list:
361 rules[id(rule)] = rule
363 for rule in rules.itervalues():
364 rule.start(self.symbol_stats)
366 retval = {}
368 for stats in self.symbol_stats:
369 try:
370 symbol = self.get_symbol(run_options, stats)
371 except IndeterminateSymbolException, e:
372 self.log_symbol_summary(stats, stats.lod)
373 mismatches.append(e.stats)
374 except SymbolPlanError, e:
375 self.log_symbol_summary(stats, stats.lod)
376 errors.append(e)
377 else:
378 self.log_symbol_summary(stats, symbol)
379 retval[stats.lod] = symbol
381 for rule in rules.itervalues():
382 rule.finish()
384 if self.symbol_info_file:
385 self.symbol_info_file.close()
387 del self.symbol_info_file
389 if errors or mismatches:
390 s = ['Problems determining how symbols should be converted:\n']
391 for e in errors:
392 s.append('%s\n' % (e,))
393 if mismatches:
394 s.append(
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
398 'and/or\n'
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats in mismatches:
402 s.append(' %s\n' % (stats,))
403 raise FatalError(''.join(s))
404 else:
405 return retval
407 def run(self, run_options, stats_keeper):
408 Ctx()._projects = read_projects(
409 artifact_manager.get_temp_file(config.PROJECTS)
411 self.symbol_stats = SymbolStatistics(
412 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
415 symbol_map = self.get_symbols(run_options)
417 # Check the symbols for consistency and bail out if there were errors:
418 self.symbol_stats.check_consistency(symbol_map)
420 # Check that the symbols all have SVN paths set and that the paths
421 # are disjoint:
422 Ctx().output_option.check_symbols(symbol_map)
424 for symbol in symbol_map.itervalues():
425 if isinstance(symbol, ExcludedSymbol):
426 self.symbol_stats.exclude_symbol(symbol)
428 create_symbol_database(symbol_map.values())
430 del self.symbol_stats
432 logger.quiet("Done")
435 class FilterSymbolsPass(Pass):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self):
442 self._register_temp_file(config.ITEM_SERIALIZER)
443 self._register_temp_file(config.CVS_REVS_DATAFILE)
444 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
445 self._register_temp_file_needed(config.PROJECTS)
446 self._register_temp_file_needed(config.SYMBOL_DB)
447 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
448 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
449 self._register_temp_file_needed(config.CVS_PATHS_DB)
450 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
451 Ctx().revision_collector.register_artifacts(self)
453 def run(self, run_options, stats_keeper):
454 Ctx()._projects = read_projects(
455 artifact_manager.get_temp_file(config.PROJECTS)
457 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
458 Ctx()._metadata_db = MetadataDatabase(
459 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
460 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
461 DB_OPEN_READ,
463 Ctx()._symbol_db = SymbolDatabase()
464 cvs_item_store = OldCVSItemStore(
465 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
467 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
468 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
469 cPickle.dump(cvs_item_serializer, f, -1)
470 f.close()
472 rev_db = NewSortableCVSRevisionDatabase(
473 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
474 cvs_item_serializer,
477 symbol_db = NewSortableCVSSymbolDatabase(
478 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
479 cvs_item_serializer,
482 revision_collector = Ctx().revision_collector
484 logger.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper.reset_cvs_rev_info()
487 revision_collector.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
491 logger.verbose(cvs_file_items.cvs_file.rcs_path)
492 cvs_file_items.filter_excluded_symbols()
493 cvs_file_items.mutate_symbols()
494 cvs_file_items.adjust_parents()
495 cvs_file_items.refine_symbols()
496 cvs_file_items.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items.record_opened_symbols()
500 cvs_file_items.record_closed_symbols()
501 cvs_file_items.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
504 # file:
505 revision_collector.process_file(cvs_file_items)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
509 for cvs_item in cvs_file_items.values():
510 stats_keeper.record_cvs_item(cvs_item)
512 if isinstance(cvs_item, CVSRevision):
513 rev_db.add(cvs_item)
514 elif isinstance(cvs_item, CVSSymbol):
515 symbol_db.add(cvs_item)
517 stats_keeper.set_stats_reflect_exclude(True)
519 rev_db.close()
520 symbol_db.close()
521 revision_collector.finish()
522 cvs_item_store.close()
523 Ctx()._symbol_db.close()
524 Ctx()._cvs_path_db.close()
526 logger.quiet("Done")
529 class SortRevisionsPass(Pass):
530 """Sort the revisions file."""
532 def register_artifacts(self):
533 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
534 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
536 def run(self, run_options, stats_keeper):
537 logger.quiet("Sorting CVS revision summaries...")
538 sort_file(
539 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
540 artifact_manager.get_temp_file(
541 config.CVS_REVS_SORTED_DATAFILE
543 tempdirs=[Ctx().tmpdir],
545 logger.quiet("Done")
548 class SortSymbolsPass(Pass):
549 """Sort the symbols file."""
551 def register_artifacts(self):
552 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
553 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
555 def run(self, run_options, stats_keeper):
556 logger.quiet("Sorting CVS symbol summaries...")
557 sort_file(
558 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
559 artifact_manager.get_temp_file(
560 config.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs=[Ctx().tmpdir],
564 logger.quiet("Done")
567 class InitializeChangesetsPass(Pass):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self):
571 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
572 self._register_temp_file(config.CHANGESETS_STORE)
573 self._register_temp_file(config.CHANGESETS_INDEX)
574 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
575 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
576 self._register_temp_file_needed(config.PROJECTS)
577 self._register_temp_file_needed(config.SYMBOL_DB)
578 self._register_temp_file_needed(config.CVS_PATHS_DB)
579 self._register_temp_file_needed(config.ITEM_SERIALIZER)
580 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
581 self._register_temp_file_needed(
582 config.CVS_SYMBOLS_SORTED_DATAFILE)
584 def get_revision_changesets(self):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id = None
592 old_timestamp = None
593 changeset_items = []
595 db = OldSortableCVSRevisionDatabase(
596 artifact_manager.get_temp_file(
597 config.CVS_REVS_SORTED_DATAFILE
599 self.cvs_item_serializer,
602 for cvs_rev in db:
603 if cvs_rev.metadata_id != old_metadata_id \
604 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
605 # Start a new changeset. First finish up the old changeset,
606 # if any:
607 if changeset_items:
608 yield changeset_items
609 changeset_items = []
610 old_metadata_id = cvs_rev.metadata_id
611 changeset_items.append(cvs_rev)
612 old_timestamp = cvs_rev.timestamp
614 # Finish up the last changeset, if any:
615 if changeset_items:
616 yield changeset_items
618 def get_symbol_changesets(self):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
624 old_symbol_id = None
625 changeset_items = []
627 db = OldSortableCVSSymbolDatabase(
628 artifact_manager.get_temp_file(
629 config.CVS_SYMBOLS_SORTED_DATAFILE
631 self.cvs_item_serializer,
634 for cvs_symbol in db:
635 if cvs_symbol.symbol.id != old_symbol_id:
636 # Start a new changeset. First finish up the old changeset,
637 # if any:
638 if changeset_items:
639 yield changeset_items
640 changeset_items = []
641 old_symbol_id = cvs_symbol.symbol.id
642 changeset_items.append(cvs_symbol)
644 # Finish up the last changeset, if any:
645 if changeset_items:
646 yield changeset_items
648 @staticmethod
649 def compare_items(a, b):
650 return (
651 cmp(a.timestamp, b.timestamp)
652 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
653 or cmp([int(x) for x in a.rev.split('.')],
654 [int(x) for x in b.rev.split('.')])
655 or cmp(a.id, b.id))
657 def break_internal_dependencies(self, changeset_items):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
673 # on each other.
674 dependencies = []
675 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
676 for cvs_item in changeset_items:
677 for next_id in cvs_item.get_succ_ids():
678 if next_id in changeset_cvs_item_ids:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id == cvs_item.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item,))
683 dependencies.append((cvs_item.id, next_id,))
685 if dependencies:
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items.sort(self.compare_items)
689 indexes = {}
690 for (i, changeset_item) in enumerate(changeset_items):
691 indexes[changeset_item.id] = i
693 # How many internal dependencies would be broken by breaking the
694 # Changeset after a particular index?
695 breaks = [0] * len(changeset_items)
696 for (pred, succ,) in dependencies:
697 pred_index = indexes[pred]
698 succ_index = indexes[succ]
699 breaks[min(pred_index, succ_index)] += 1
700 breaks[max(pred_index, succ_index)] -= 1
701 for i in range(1, len(breaks)):
702 breaks[i] += breaks[i - 1]
704 best_i = None
705 best_count = -1
706 best_gap = 0
707 for i in range(0, len(breaks) - 1):
708 gap = changeset_items[i + 1].timestamp - changeset_items[i].timestamp
709 if (
710 breaks[i] > best_count
711 or breaks[i] == best_count and gap > best_gap
713 best_i = i
714 best_count = breaks[i]
715 best_gap = gap
717 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
718 else:
719 return [changeset_items]
721 def break_all_internal_dependencies(self, changeset_items):
722 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
724 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
725 be part of a single changeset. Break this list into sublists,
726 where the CVSRevisions in each sublist are free of mutual
727 dependencies."""
729 # This method is written non-recursively to avoid any possible
730 # problems with recursion depth.
732 changesets_to_split = [changeset_items]
733 while changesets_to_split:
734 changesets = self.break_internal_dependencies(changesets_to_split.pop())
735 if len(changesets) == 1:
736 [changeset_items] = changesets
737 yield changeset_items
738 else:
739 # The changeset had to be split; see if either of the
740 # fragments have to be split:
741 changesets.reverse()
742 changesets_to_split.extend(changesets)
744 def get_changesets(self):
745 """Generate (Changeset, [CVSItem,...]) for all changesets.
747 The Changesets already have their internal dependencies broken.
748 The [CVSItem,...] list is the list of CVSItems in the
749 corresponding Changeset."""
751 for changeset_items in self.get_revision_changesets():
752 for split_changeset_items \
753 in self.break_all_internal_dependencies(changeset_items):
754 yield (
755 RevisionChangeset(
756 self.changeset_key_generator.gen_id(),
757 [cvs_rev.id for cvs_rev in split_changeset_items]
759 split_changeset_items,
762 for changeset_items in self.get_symbol_changesets():
763 yield (
764 create_symbol_changeset(
765 self.changeset_key_generator.gen_id(),
766 changeset_items[0].symbol,
767 [cvs_symbol.id for cvs_symbol in changeset_items]
769 changeset_items,
772 def run(self, run_options, stats_keeper):
773 logger.quiet("Creating preliminary commit sets...")
775 Ctx()._projects = read_projects(
776 artifact_manager.get_temp_file(config.PROJECTS)
778 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
779 Ctx()._symbol_db = SymbolDatabase()
781 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
782 self.cvs_item_serializer = cPickle.load(f)
783 f.close()
785 changeset_db = ChangesetDatabase(
786 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
787 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
788 DB_OPEN_NEW,
790 cvs_item_to_changeset_id = CVSItemToChangesetTable(
791 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
792 DB_OPEN_NEW,
795 self.sorted_cvs_items_db = IndexedCVSItemStore(
796 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
797 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
798 DB_OPEN_NEW)
800 self.changeset_key_generator = KeyGenerator()
802 for (changeset, changeset_items) in self.get_changesets():
803 if logger.is_on(logger.DEBUG):
804 logger.debug(repr(changeset))
805 changeset_db.store(changeset)
806 for cvs_item in changeset_items:
807 self.sorted_cvs_items_db.add(cvs_item)
808 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
810 self.sorted_cvs_items_db.close()
811 cvs_item_to_changeset_id.close()
812 changeset_db.close()
813 Ctx()._symbol_db.close()
814 Ctx()._cvs_path_db.close()
816 del self.cvs_item_serializer
818 logger.quiet("Done")
821 class ProcessedChangesetLogger:
822 def __init__(self):
823 self.processed_changeset_ids = []
825 def log(self, changeset_id):
826 if logger.is_on(logger.DEBUG):
827 self.processed_changeset_ids.append(changeset_id)
829 def flush(self):
830 if self.processed_changeset_ids:
831 logger.debug(
832 'Consumed changeset ids %s'
833 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
835 del self.processed_changeset_ids[:]
838 class BreakRevisionChangesetCyclesPass(Pass):
839 """Break up any dependency cycles involving only RevisionChangesets."""
841 def register_artifacts(self):
842 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
843 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
844 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
845 self._register_temp_file_needed(config.PROJECTS)
846 self._register_temp_file_needed(config.SYMBOL_DB)
847 self._register_temp_file_needed(config.CVS_PATHS_DB)
848 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
849 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
850 self._register_temp_file_needed(config.CHANGESETS_STORE)
851 self._register_temp_file_needed(config.CHANGESETS_INDEX)
852 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
854 def get_source_changesets(self):
855 old_changeset_db = ChangesetDatabase(
856 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
857 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
858 DB_OPEN_READ)
860 changeset_ids = old_changeset_db.keys()
862 for changeset_id in changeset_ids:
863 yield old_changeset_db[changeset_id]
865 old_changeset_db.close()
866 del old_changeset_db
868 def break_cycle(self, cycle):
869 """Break up one or more changesets in CYCLE to help break the cycle.
871 CYCLE is a list of Changesets where
873 cycle[i] depends on cycle[i - 1]
875 Break up one or more changesets in CYCLE to make progress towards
876 breaking the cycle. Update self.changeset_graph accordingly.
878 It is not guaranteed that the cycle will be broken by one call to
879 this routine, but at least some progress must be made."""
881 self.processed_changeset_logger.flush()
882 best_i = None
883 best_link = None
884 for i in range(len(cycle)):
885 # It's OK if this index wraps to -1:
886 link = ChangesetGraphLink(
887 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
889 if best_i is None or link < best_link:
890 best_i = i
891 best_link = link
893 if logger.is_on(logger.DEBUG):
894 logger.debug(
895 'Breaking cycle %s by breaking node %x' % (
896 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
897 best_link.changeset.id,))
899 new_changesets = best_link.break_changeset(self.changeset_key_generator)
901 self.changeset_graph.delete_changeset(best_link.changeset)
903 for changeset in new_changesets:
904 self.changeset_graph.add_new_changeset(changeset)
906 def run(self, run_options, stats_keeper):
907 logger.quiet("Breaking revision changeset dependency cycles...")
909 Ctx()._projects = read_projects(
910 artifact_manager.get_temp_file(config.PROJECTS)
912 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
913 Ctx()._symbol_db = SymbolDatabase()
914 Ctx()._cvs_items_db = IndexedCVSItemStore(
915 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
916 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
917 DB_OPEN_READ)
919 shutil.copyfile(
920 artifact_manager.get_temp_file(
921 config.CVS_ITEM_TO_CHANGESET),
922 artifact_manager.get_temp_file(
923 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
924 cvs_item_to_changeset_id = CVSItemToChangesetTable(
925 artifact_manager.get_temp_file(
926 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
927 DB_OPEN_WRITE)
929 changeset_db = ChangesetDatabase(
930 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
931 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
932 DB_OPEN_NEW)
934 self.changeset_graph = ChangesetGraph(
935 changeset_db, cvs_item_to_changeset_id
938 max_changeset_id = 0
939 for changeset in self.get_source_changesets():
940 changeset_db.store(changeset)
941 if isinstance(changeset, RevisionChangeset):
942 self.changeset_graph.add_changeset(changeset)
943 max_changeset_id = max(max_changeset_id, changeset.id)
945 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
947 self.processed_changeset_logger = ProcessedChangesetLogger()
949 # Consume the graph, breaking cycles using self.break_cycle():
950 for (changeset, time_range) in self.changeset_graph.consume_graph(
951 cycle_breaker=self.break_cycle
953 self.processed_changeset_logger.log(changeset.id)
955 self.processed_changeset_logger.flush()
956 del self.processed_changeset_logger
958 self.changeset_graph.close()
959 self.changeset_graph = None
960 Ctx()._cvs_items_db.close()
961 Ctx()._symbol_db.close()
962 Ctx()._cvs_path_db.close()
964 logger.quiet("Done")
967 class RevisionTopologicalSortPass(Pass):
968 """Sort RevisionChangesets into commit order.
970 Also convert them to OrderedChangesets, without changing their ids."""
972 def register_artifacts(self):
973 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
974 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
975 self._register_temp_file_needed(config.PROJECTS)
976 self._register_temp_file_needed(config.SYMBOL_DB)
977 self._register_temp_file_needed(config.CVS_PATHS_DB)
978 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
979 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
980 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
981 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
982 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
984 def get_source_changesets(self, changeset_db):
985 changeset_ids = changeset_db.keys()
987 for changeset_id in changeset_ids:
988 yield changeset_db[changeset_id]
990 def get_changesets(self):
991 changeset_db = ChangesetDatabase(
992 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
993 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
994 DB_OPEN_READ,
997 changeset_graph = ChangesetGraph(
998 changeset_db,
999 CVSItemToChangesetTable(
1000 artifact_manager.get_temp_file(
1001 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
1003 DB_OPEN_READ,
1007 for changeset in self.get_source_changesets(changeset_db):
1008 if isinstance(changeset, RevisionChangeset):
1009 changeset_graph.add_changeset(changeset)
1010 else:
1011 yield changeset
1013 changeset_ids = []
1015 # Sentry:
1016 changeset_ids.append(None)
1018 for (changeset, time_range) in changeset_graph.consume_graph():
1019 changeset_ids.append(changeset.id)
1021 # Sentry:
1022 changeset_ids.append(None)
1024 for i in range(1, len(changeset_ids) - 1):
1025 changeset = changeset_db[changeset_ids[i]]
1026 yield OrderedChangeset(
1027 changeset.id, changeset.cvs_item_ids, i - 1,
1028 changeset_ids[i - 1], changeset_ids[i + 1])
1030 changeset_graph.close()
1032 def run(self, run_options, stats_keeper):
1033 logger.quiet("Generating CVSRevisions in commit order...")
1035 Ctx()._projects = read_projects(
1036 artifact_manager.get_temp_file(config.PROJECTS)
1038 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1039 Ctx()._symbol_db = SymbolDatabase()
1040 Ctx()._cvs_items_db = IndexedCVSItemStore(
1041 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1042 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1043 DB_OPEN_READ)
1045 changesets_revordered_db = ChangesetDatabase(
1046 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1047 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1048 DB_OPEN_NEW)
1050 for changeset in self.get_changesets():
1051 changesets_revordered_db.store(changeset)
1053 changesets_revordered_db.close()
1054 Ctx()._cvs_items_db.close()
1055 Ctx()._symbol_db.close()
1056 Ctx()._cvs_path_db.close()
1058 logger.quiet("Done")
1061 class BreakSymbolChangesetCyclesPass(Pass):
1062 """Break up any dependency cycles involving only SymbolChangesets."""
1064 def register_artifacts(self):
1065 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1066 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1067 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1068 self._register_temp_file_needed(config.PROJECTS)
1069 self._register_temp_file_needed(config.SYMBOL_DB)
1070 self._register_temp_file_needed(config.CVS_PATHS_DB)
1071 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1072 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1073 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1074 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1075 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1077 def get_source_changesets(self):
1078 old_changeset_db = ChangesetDatabase(
1079 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1080 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1081 DB_OPEN_READ)
1083 changeset_ids = old_changeset_db.keys()
1085 for changeset_id in changeset_ids:
1086 yield old_changeset_db[changeset_id]
1088 old_changeset_db.close()
1090 def break_cycle(self, cycle):
1091 """Break up one or more changesets in CYCLE to help break the cycle.
1093 CYCLE is a list of Changesets where
1095 cycle[i] depends on cycle[i - 1]
1097 Break up one or more changesets in CYCLE to make progress towards
1098 breaking the cycle. Update self.changeset_graph accordingly.
1100 It is not guaranteed that the cycle will be broken by one call to
1101 this routine, but at least some progress must be made."""
1103 self.processed_changeset_logger.flush()
1104 best_i = None
1105 best_link = None
1106 for i in range(len(cycle)):
1107 # It's OK if this index wraps to -1:
1108 link = ChangesetGraphLink(
1109 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1111 if best_i is None or link < best_link:
1112 best_i = i
1113 best_link = link
1115 if logger.is_on(logger.DEBUG):
1116 logger.debug(
1117 'Breaking cycle %s by breaking node %x' % (
1118 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1119 best_link.changeset.id,))
1121 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1123 self.changeset_graph.delete_changeset(best_link.changeset)
1125 for changeset in new_changesets:
1126 self.changeset_graph.add_new_changeset(changeset)
1128 def run(self, run_options, stats_keeper):
1129 logger.quiet("Breaking symbol changeset dependency cycles...")
1131 Ctx()._projects = read_projects(
1132 artifact_manager.get_temp_file(config.PROJECTS)
1134 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1135 Ctx()._symbol_db = SymbolDatabase()
1136 Ctx()._cvs_items_db = IndexedCVSItemStore(
1137 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1138 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1139 DB_OPEN_READ)
1141 shutil.copyfile(
1142 artifact_manager.get_temp_file(
1143 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1144 artifact_manager.get_temp_file(
1145 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1146 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1147 artifact_manager.get_temp_file(
1148 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1149 DB_OPEN_WRITE)
1151 changeset_db = ChangesetDatabase(
1152 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1153 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1154 DB_OPEN_NEW)
1156 self.changeset_graph = ChangesetGraph(
1157 changeset_db, cvs_item_to_changeset_id
1160 max_changeset_id = 0
1161 for changeset in self.get_source_changesets():
1162 changeset_db.store(changeset)
1163 if isinstance(changeset, SymbolChangeset):
1164 self.changeset_graph.add_changeset(changeset)
1165 max_changeset_id = max(max_changeset_id, changeset.id)
1167 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1169 self.processed_changeset_logger = ProcessedChangesetLogger()
1171 # Consume the graph, breaking cycles using self.break_cycle():
1172 for (changeset, time_range) in self.changeset_graph.consume_graph(
1173 cycle_breaker=self.break_cycle
1175 self.processed_changeset_logger.log(changeset.id)
1177 self.processed_changeset_logger.flush()
1178 del self.processed_changeset_logger
1180 self.changeset_graph.close()
1181 self.changeset_graph = None
1182 Ctx()._cvs_items_db.close()
1183 Ctx()._symbol_db.close()
1184 Ctx()._cvs_path_db.close()
1186 logger.quiet("Done")
1189 class BreakAllChangesetCyclesPass(Pass):
1190 """Break up any dependency cycles that are closed by SymbolChangesets."""
1192 def register_artifacts(self):
1193 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1194 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1195 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1196 self._register_temp_file_needed(config.PROJECTS)
1197 self._register_temp_file_needed(config.SYMBOL_DB)
1198 self._register_temp_file_needed(config.CVS_PATHS_DB)
1199 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1200 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1201 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1202 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1203 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1205 def get_source_changesets(self):
1206 old_changeset_db = ChangesetDatabase(
1207 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1208 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1209 DB_OPEN_READ)
1211 changeset_ids = old_changeset_db.keys()
1213 for changeset_id in changeset_ids:
1214 yield old_changeset_db[changeset_id]
1216 old_changeset_db.close()
1218 def _split_retrograde_changeset(self, changeset):
1219 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1221 logger.debug('Breaking retrograde changeset %x' % (changeset.id,))
1223 self.changeset_graph.delete_changeset(changeset)
1225 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1226 ordinal_limits = {}
1227 for cvs_branch in changeset.iter_cvs_items():
1228 max_pred_ordinal = 0
1229 min_succ_ordinal = sys.maxint
1231 for pred_id in cvs_branch.get_pred_ids():
1232 pred_ordinal = self.ordinals.get(
1233 self.cvs_item_to_changeset_id[pred_id], 0)
1234 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1236 for succ_id in cvs_branch.get_succ_ids():
1237 succ_ordinal = self.ordinals.get(
1238 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1239 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1241 assert max_pred_ordinal < min_succ_ordinal
1242 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1244 # Find the earliest successor ordinal:
1245 min_min_succ_ordinal = sys.maxint
1246 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1247 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1249 early_item_ids = []
1250 late_item_ids = []
1251 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1252 if max_pred_ordinal >= min_min_succ_ordinal:
1253 late_item_ids.append(id)
1254 else:
1255 early_item_ids.append(id)
1257 assert early_item_ids
1258 assert late_item_ids
1260 early_changeset = changeset.create_split_changeset(
1261 self.changeset_key_generator.gen_id(), early_item_ids)
1262 late_changeset = changeset.create_split_changeset(
1263 self.changeset_key_generator.gen_id(), late_item_ids)
1265 self.changeset_graph.add_new_changeset(early_changeset)
1266 self.changeset_graph.add_new_changeset(late_changeset)
1268 early_split = self._split_if_retrograde(early_changeset.id)
1270 # Because of the way we constructed it, the early changeset should
1271 # not have to be split:
1272 assert not early_split
1274 self._split_if_retrograde(late_changeset.id)
1276 def _split_if_retrograde(self, changeset_id):
1277 node = self.changeset_graph[changeset_id]
1278 pred_ordinals = [
1279 self.ordinals[id]
1280 for id in node.pred_ids
1281 if id in self.ordinals
1283 pred_ordinals.sort()
1284 succ_ordinals = [
1285 self.ordinals[id]
1286 for id in node.succ_ids
1287 if id in self.ordinals
1289 succ_ordinals.sort()
1290 if pred_ordinals and succ_ordinals \
1291 and pred_ordinals[-1] >= succ_ordinals[0]:
1292 self._split_retrograde_changeset(self.changeset_db[node.id])
1293 return True
1294 else:
1295 return False
1297 def break_segment(self, segment):
1298 """Break a changeset in SEGMENT[1:-1].
1300 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1301 that range are SymbolChangesets."""
1303 best_i = None
1304 best_link = None
1305 for i in range(1, len(segment) - 1):
1306 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1308 if best_i is None or link < best_link:
1309 best_i = i
1310 best_link = link
1312 if logger.is_on(logger.DEBUG):
1313 logger.debug(
1314 'Breaking segment %s by breaking node %x' % (
1315 ' -> '.join(['%x' % node.id for node in segment]),
1316 best_link.changeset.id,))
1318 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1320 self.changeset_graph.delete_changeset(best_link.changeset)
1322 for changeset in new_changesets:
1323 self.changeset_graph.add_new_changeset(changeset)
1325 def break_cycle(self, cycle):
1326 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1328 CYCLE is a list of SymbolChangesets where
1330 cycle[i] depends on cycle[i - 1]
1332 . Break up one or more changesets in CYCLE to make progress
1333 towards breaking the cycle. Update self.changeset_graph
1334 accordingly.
1336 It is not guaranteed that the cycle will be broken by one call to
1337 this routine, but at least some progress must be made."""
1339 if logger.is_on(logger.DEBUG):
1340 logger.debug(
1341 'Breaking cycle %s' % (
1342 ' -> '.join(['%x' % changeset.id
1343 for changeset in cycle + [cycle[0]]]),))
1345 # Unwrap the cycle into a segment then break the segment:
1346 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1348 def run(self, run_options, stats_keeper):
1349 logger.quiet("Breaking CVSSymbol dependency loops...")
1351 Ctx()._projects = read_projects(
1352 artifact_manager.get_temp_file(config.PROJECTS)
1354 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1355 Ctx()._symbol_db = SymbolDatabase()
1356 Ctx()._cvs_items_db = IndexedCVSItemStore(
1357 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1358 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1359 DB_OPEN_READ)
1361 shutil.copyfile(
1362 artifact_manager.get_temp_file(
1363 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1364 artifact_manager.get_temp_file(
1365 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1366 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1367 artifact_manager.get_temp_file(
1368 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1369 DB_OPEN_WRITE)
1371 self.changeset_db = ChangesetDatabase(
1372 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1373 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1374 DB_OPEN_NEW)
1376 self.changeset_graph = ChangesetGraph(
1377 self.changeset_db, self.cvs_item_to_changeset_id
1380 # A map {changeset_id : ordinal} for OrderedChangesets:
1381 self.ordinals = {}
1382 # A map {ordinal : changeset_id}:
1383 ordered_changeset_map = {}
1384 # A list of all BranchChangeset ids:
1385 branch_changeset_ids = []
1386 max_changeset_id = 0
1387 for changeset in self.get_source_changesets():
1388 self.changeset_db.store(changeset)
1389 self.changeset_graph.add_changeset(changeset)
1390 if isinstance(changeset, OrderedChangeset):
1391 ordered_changeset_map[changeset.ordinal] = changeset.id
1392 self.ordinals[changeset.id] = changeset.ordinal
1393 elif isinstance(changeset, BranchChangeset):
1394 branch_changeset_ids.append(changeset.id)
1395 max_changeset_id = max(max_changeset_id, changeset.id)
1397 # An array of ordered_changeset ids, indexed by ordinal:
1398 ordered_changesets = []
1399 for ordinal in range(len(ordered_changeset_map)):
1400 id = ordered_changeset_map[ordinal]
1401 ordered_changesets.append(id)
1403 ordered_changeset_ids = set(ordered_changeset_map.values())
1404 del ordered_changeset_map
1406 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1408 # First we scan through all BranchChangesets looking for
1409 # changesets that are individually "retrograde" and splitting
1410 # those up:
1411 for changeset_id in branch_changeset_ids:
1412 self._split_if_retrograde(changeset_id)
1414 del self.ordinals
1416 next_ordered_changeset = 0
1418 self.processed_changeset_logger = ProcessedChangesetLogger()
1420 while self.changeset_graph:
1421 # Consume any nodes that don't have predecessors:
1422 for (changeset, time_range) \
1423 in self.changeset_graph.consume_nopred_nodes():
1424 self.processed_changeset_logger.log(changeset.id)
1425 if changeset.id in ordered_changeset_ids:
1426 next_ordered_changeset += 1
1427 ordered_changeset_ids.remove(changeset.id)
1429 self.processed_changeset_logger.flush()
1431 if not self.changeset_graph:
1432 break
1434 # Now work on the next ordered changeset that has not yet been
1435 # processed. BreakSymbolChangesetCyclesPass has broken any
1436 # cycles involving only SymbolChangesets, so the presence of a
1437 # cycle implies that there is at least one ordered changeset
1438 # left in the graph:
1439 assert next_ordered_changeset < len(ordered_changesets)
1441 id = ordered_changesets[next_ordered_changeset]
1442 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1443 if path:
1444 if logger.is_on(logger.DEBUG):
1445 logger.debug('Breaking path from %s to %s' % (path[0], path[-1],))
1446 self.break_segment(path)
1447 else:
1448 # There were no ordered changesets among the reachable
1449 # predecessors, so do generic cycle-breaking:
1450 if logger.is_on(logger.DEBUG):
1451 logger.debug(
1452 'Breaking generic cycle found from %s'
1453 % (self.changeset_db[id],)
1455 self.break_cycle(self.changeset_graph.find_cycle(id))
1457 del self.processed_changeset_logger
1458 self.changeset_graph.close()
1459 self.changeset_graph = None
1460 self.cvs_item_to_changeset_id = None
1461 self.changeset_db = None
1463 logger.quiet("Done")
1466 class TopologicalSortPass(Pass):
1467 """Sort changesets into commit order."""
1469 def register_artifacts(self):
1470 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1471 self._register_temp_file_needed(config.PROJECTS)
1472 self._register_temp_file_needed(config.SYMBOL_DB)
1473 self._register_temp_file_needed(config.CVS_PATHS_DB)
1474 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1475 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1476 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1477 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1478 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1480 def get_source_changesets(self, changeset_db):
1481 for changeset_id in changeset_db.keys():
1482 yield changeset_db[changeset_id]
1484 def get_changesets(self):
1485 """Generate (changeset, timestamp) pairs in commit order."""
1487 changeset_db = ChangesetDatabase(
1488 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1489 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1490 DB_OPEN_READ)
1492 changeset_graph = ChangesetGraph(
1493 changeset_db,
1494 CVSItemToChangesetTable(
1495 artifact_manager.get_temp_file(
1496 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1498 DB_OPEN_READ,
1501 symbol_changeset_ids = set()
1503 for changeset in self.get_source_changesets(changeset_db):
1504 changeset_graph.add_changeset(changeset)
1505 if isinstance(changeset, SymbolChangeset):
1506 symbol_changeset_ids.add(changeset.id)
1508 # Ensure a monotonically-increasing timestamp series by keeping
1509 # track of the previous timestamp and ensuring that the following
1510 # one is larger.
1511 timestamper = Timestamper()
1513 for (changeset, time_range) in changeset_graph.consume_graph():
1514 timestamp = timestamper.get(
1515 time_range.t_max, changeset.id in symbol_changeset_ids
1517 yield (changeset, timestamp)
1519 changeset_graph.close()
1521 def run(self, run_options, stats_keeper):
1522 logger.quiet("Generating CVSRevisions in commit order...")
1524 Ctx()._projects = read_projects(
1525 artifact_manager.get_temp_file(config.PROJECTS)
1527 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1528 Ctx()._symbol_db = SymbolDatabase()
1529 Ctx()._cvs_items_db = IndexedCVSItemStore(
1530 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1531 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1532 DB_OPEN_READ)
1534 sorted_changesets = open(
1535 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1536 'w')
1538 for (changeset, timestamp) in self.get_changesets():
1539 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1541 sorted_changesets.close()
1543 Ctx()._cvs_items_db.close()
1544 Ctx()._symbol_db.close()
1545 Ctx()._cvs_path_db.close()
1547 logger.quiet("Done")
1550 class CreateRevsPass(Pass):
1551 """Generate the SVNCommit <-> CVSRevision mapping databases.
1553 SVNCommitCreator also calls SymbolingsLogger to register
1554 CVSRevisions that represent an opening or closing for a path on a
1555 branch or tag. See SymbolingsLogger for more details.
1557 This pass was formerly known as pass5."""
1559 def register_artifacts(self):
1560 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1561 self._register_temp_file(config.SVN_COMMITS_STORE)
1562 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1563 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1564 self._register_temp_file_needed(config.PROJECTS)
1565 self._register_temp_file_needed(config.CVS_PATHS_DB)
1566 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1567 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1568 self._register_temp_file_needed(config.SYMBOL_DB)
1569 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1570 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1571 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1573 def get_changesets(self):
1574 """Generate (changeset,timestamp,) tuples in commit order."""
1576 changeset_db = ChangesetDatabase(
1577 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1578 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1579 DB_OPEN_READ)
1581 for line in file(
1582 artifact_manager.get_temp_file(
1583 config.CHANGESETS_SORTED_DATAFILE)):
1584 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1585 yield (changeset_db[changeset_id], timestamp)
1587 changeset_db.close()
1589 def get_svn_commits(self, creator):
1590 """Generate the SVNCommits, in order."""
1592 for (changeset, timestamp) in self.get_changesets():
1593 for svn_commit in creator.process_changeset(changeset, timestamp):
1594 yield svn_commit
1596 def log_svn_commit(self, svn_commit):
1597 """Output information about SVN_COMMIT."""
1599 logger.normal(
1600 'Creating Subversion r%d (%s)'
1601 % (svn_commit.revnum, svn_commit.get_description(),)
1604 if isinstance(svn_commit, SVNRevisionCommit):
1605 for cvs_rev in svn_commit.cvs_revs:
1606 logger.verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1608 def run(self, run_options, stats_keeper):
1609 logger.quiet("Mapping CVS revisions to Subversion commits...")
1611 Ctx()._projects = read_projects(
1612 artifact_manager.get_temp_file(config.PROJECTS)
1614 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1615 Ctx()._symbol_db = SymbolDatabase()
1616 Ctx()._cvs_items_db = IndexedCVSItemStore(
1617 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1618 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1619 DB_OPEN_READ)
1621 Ctx()._symbolings_logger = SymbolingsLogger()
1623 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1625 creator = SVNCommitCreator()
1626 for svn_commit in self.get_svn_commits(creator):
1627 self.log_svn_commit(svn_commit)
1628 persistence_manager.put_svn_commit(svn_commit)
1630 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1631 del creator
1633 persistence_manager.close()
1634 Ctx()._symbolings_logger.close()
1635 Ctx()._cvs_items_db.close()
1636 Ctx()._symbol_db.close()
1637 Ctx()._cvs_path_db.close()
1639 logger.quiet("Done")
1642 class SortSymbolOpeningsClosingsPass(Pass):
1643 """This pass was formerly known as pass6."""
1645 def register_artifacts(self):
1646 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1647 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1649 def run(self, run_options, stats_keeper):
1650 logger.quiet("Sorting symbolic name source revisions...")
1652 def sort_key(line):
1653 line = line.split(' ', 2)
1654 return (int(line[0], 16), int(line[1]), line[2],)
1656 sort_file(
1657 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1658 artifact_manager.get_temp_file(
1659 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1661 key=sort_key,
1662 tempdirs=[Ctx().tmpdir],
1664 logger.quiet("Done")
1667 class IndexSymbolsPass(Pass):
1668 """This pass was formerly known as pass7."""
1670 def register_artifacts(self):
1671 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1672 self._register_temp_file_needed(config.PROJECTS)
1673 self._register_temp_file_needed(config.SYMBOL_DB)
1674 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1676 def generate_offsets_for_symbolings(self):
1677 """This function iterates through all the lines in
1678 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1679 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1680 where SYMBOLIC_NAME is first encountered. This will allow us to
1681 seek to the various offsets in the file and sequentially read only
1682 the openings and closings that we need."""
1684 offsets = {}
1686 f = open(
1687 artifact_manager.get_temp_file(
1688 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1689 'r')
1690 old_id = None
1691 while True:
1692 fpos = f.tell()
1693 line = f.readline()
1694 if not line:
1695 break
1696 id, svn_revnum, ignored = line.split(" ", 2)
1697 id = int(id, 16)
1698 if id != old_id:
1699 logger.verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1700 old_id = id
1701 offsets[id] = fpos
1703 f.close()
1705 offsets_db = file(
1706 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1707 cPickle.dump(offsets, offsets_db, -1)
1708 offsets_db.close()
1710 def run(self, run_options, stats_keeper):
1711 logger.quiet("Determining offsets for all symbolic names...")
1712 Ctx()._projects = read_projects(
1713 artifact_manager.get_temp_file(config.PROJECTS)
1715 Ctx()._symbol_db = SymbolDatabase()
1716 self.generate_offsets_for_symbolings()
1717 Ctx()._symbol_db.close()
1718 logger.quiet("Done.")
1721 class OutputPass(Pass):
1722 """This pass was formerly known as pass8."""
1724 def register_artifacts(self):
1725 self._register_temp_file_needed(config.PROJECTS)
1726 self._register_temp_file_needed(config.CVS_PATHS_DB)
1727 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1728 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1729 self._register_temp_file_needed(config.SYMBOL_DB)
1730 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1731 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1732 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1733 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1734 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1735 Ctx().output_option.register_artifacts(self)
1737 def run(self, run_options, stats_keeper):
1738 Ctx()._projects = read_projects(
1739 artifact_manager.get_temp_file(config.PROJECTS)
1741 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1742 Ctx()._metadata_db = MetadataDatabase(
1743 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1744 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1745 DB_OPEN_READ,
1747 Ctx()._cvs_items_db = IndexedCVSItemStore(
1748 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1749 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1750 DB_OPEN_READ)
1751 Ctx()._symbol_db = SymbolDatabase()
1752 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1754 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1756 svn_revnum = 1
1757 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1758 while svn_commit:
1759 svn_commit.output(Ctx().output_option)
1760 svn_revnum += 1
1761 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1763 Ctx().output_option.cleanup()
1764 Ctx()._persistence_manager.close()
1766 Ctx()._symbol_db.close()
1767 Ctx()._cvs_items_db.close()
1768 Ctx()._metadata_db.close()
1769 Ctx()._cvs_path_db.close()
1772 # The list of passes constituting a run of cvs2svn:
1773 passes = [
1774 CollectRevsPass(),
1775 CleanMetadataPass(),
1776 CollateSymbolsPass(),
1777 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1778 FilterSymbolsPass(),
1779 SortRevisionsPass(),
1780 SortSymbolsPass(),
1781 InitializeChangesetsPass(),
1782 #CheckIndexedItemStoreDependenciesPass(
1783 # config.CVS_ITEMS_SORTED_STORE,
1784 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1785 BreakRevisionChangesetCyclesPass(),
1786 RevisionTopologicalSortPass(),
1787 BreakSymbolChangesetCyclesPass(),
1788 BreakAllChangesetCyclesPass(),
1789 TopologicalSortPass(),
1790 CreateRevsPass(),
1791 SortSymbolOpeningsClosingsPass(),
1792 IndexSymbolsPass(),
1793 OutputPass(),