Populate Ctx()._projects from CollectRevsPass.run().
[cvs2svn.git] / cvs2svn_lib / passes.py
blob1531544bbd637a8a6fbf4c5ee5d7b4153f4172a8
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import logger
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.collect_data import CollectData
78 from cvs2svn_lib.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self):
88 self._register_temp_file(config.PROJECTS)
89 self._register_temp_file(config.SYMBOL_STATISTICS)
90 self._register_temp_file(config.METADATA_INDEX_TABLE)
91 self._register_temp_file(config.METADATA_STORE)
92 self._register_temp_file(config.CVS_PATHS_DB)
93 self._register_temp_file(config.CVS_ITEMS_STORE)
95 def run(self, run_options, stats_keeper):
96 logger.quiet("Examining all CVS ',v' files...")
97 Ctx()._projects = {}
98 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
99 cd = CollectData(stats_keeper)
100 for project in run_options.projects:
101 Ctx()._projects[project.id] = project
102 cd.process_project(project)
103 run_options.projects = None
105 fatal_errors = cd.close()
107 if fatal_errors:
108 raise FatalException("Pass 1 complete.\n"
109 + "=" * 75 + "\n"
110 + "Error summary:\n"
111 + "\n".join(fatal_errors) + "\n"
112 + "Exited due to fatal error(s).")
114 Ctx()._cvs_path_db.close()
115 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
116 logger.quiet("Done")
119 class CleanMetadataPass(Pass):
120 """Clean up CVS revision metadata and write it to a new database."""
122 def register_artifacts(self):
123 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
124 self._register_temp_file(config.METADATA_CLEAN_STORE)
125 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
126 self._register_temp_file_needed(config.METADATA_STORE)
128 def _get_clean_author(self, author):
129 """Return AUTHOR, converted appropriately to UTF8.
131 Raise a UnicodeException if it cannot be converted using the
132 configured cvs_author_decoder."""
134 try:
135 return self._authors[author]
136 except KeyError:
137 pass
139 try:
140 clean_author = Ctx().cvs_author_decoder(author)
141 except UnicodeError:
142 self._authors[author] = author
143 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
145 try:
146 clean_author = clean_author.encode('utf8')
147 except UnicodeError:
148 self._authors[author] = author
149 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
151 self._authors[author] = clean_author
152 return clean_author
154 def _get_clean_log_msg(self, log_msg):
155 """Return LOG_MSG, converted appropriately to UTF8.
157 Raise a UnicodeException if it cannot be converted using the
158 configured cvs_log_decoder."""
160 try:
161 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
162 except UnicodeError:
163 raise UnicodeError(
164 'Problem decoding log message:\n'
165 '%s\n'
166 '%s\n'
167 '%s'
168 % ('-' * 75, log_msg, '-' * 75,)
171 try:
172 return clean_log_msg.encode('utf8')
173 except UnicodeError:
174 raise UnicodeError(
175 'Problem encoding log message:\n'
176 '%s\n'
177 '%s\n'
178 '%s'
179 % ('-' * 75, log_msg, '-' * 75,)
182 def _clean_metadata(self, metadata):
183 """Clean up METADATA by overwriting its members as necessary."""
185 try:
186 metadata.author = self._get_clean_author(metadata.author)
187 except UnicodeError, e:
188 logger.warn('%s: %s' % (warning_prefix, e,))
189 self.warnings = True
191 try:
192 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
193 except UnicodeError, e:
194 logger.warn('%s: %s' % (warning_prefix, e,))
195 self.warnings = True
197 def run(self, run_options, stats_keeper):
198 logger.quiet("Converting metadata to UTF8...")
199 metadata_db = MetadataDatabase(
200 artifact_manager.get_temp_file(config.METADATA_STORE),
201 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
202 DB_OPEN_READ,
204 metadata_clean_db = MetadataDatabase(
205 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
206 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
207 DB_OPEN_NEW,
210 self.warnings = False
212 # A map {author : clean_author} for those known (to avoid
213 # repeating warnings):
214 self._authors = {}
216 for id in metadata_db.iterkeys():
217 metadata = metadata_db[id]
219 # Record the original author name because it might be needed for
220 # expanding CVS keywords:
221 metadata.original_author = metadata.author
223 self._clean_metadata(metadata)
225 metadata_clean_db[id] = metadata
227 if self.warnings:
228 raise FatalError(
229 'There were warnings converting author names and/or log messages\n'
230 'to Unicode (see messages above). Please restart this pass\n'
231 'with one or more \'--encoding\' parameters or with\n'
232 '\'--fallback-encoding\'.'
235 metadata_clean_db.close()
236 metadata_db.close()
237 logger.quiet("Done")
240 class CollateSymbolsPass(Pass):
241 """Divide symbols into branches, tags, and excludes."""
243 conversion_names = {
244 Trunk : 'trunk',
245 Branch : 'branch',
246 Tag : 'tag',
247 ExcludedSymbol : 'exclude',
248 Symbol : '.',
251 def register_artifacts(self):
252 self._register_temp_file(config.SYMBOL_DB)
253 self._register_temp_file_needed(config.PROJECTS)
254 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
256 def get_symbol(self, run_options, stats):
257 """Use StrategyRules to decide what to do with a symbol.
259 STATS is an instance of symbol_statistics._Stats describing an
260 instance of Symbol or Trunk. To determine how the symbol is to be
261 converted, consult the StrategyRules in the project's
262 symbol_strategy_rules. Each rule is allowed a chance to change
263 the way the symbol will be converted. If the symbol is not a
264 Trunk or TypedSymbol after all rules have run, raise
265 IndeterminateSymbolException."""
267 symbol = stats.lod
268 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
269 for rule in rules:
270 symbol = rule.get_symbol(symbol, stats)
271 assert symbol is not None
273 stats.check_valid(symbol)
275 return symbol
277 def log_symbol_summary(self, stats, symbol):
278 if not self.symbol_info_file:
279 return
281 if isinstance(symbol, Trunk):
282 name = '.trunk.'
283 preferred_parent_name = '.'
284 else:
285 name = stats.lod.name
286 if symbol.preferred_parent_id is None:
287 preferred_parent_name = '.'
288 else:
289 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
290 if isinstance(preferred_parent, Trunk):
291 preferred_parent_name = '.trunk.'
292 else:
293 preferred_parent_name = preferred_parent.name
295 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
296 symbol_path = symbol.base_path
297 else:
298 symbol_path = '.'
300 self.symbol_info_file.write(
301 '%-5d %-30s %-10s %s %s\n' % (
302 stats.lod.project.id,
303 name,
304 self.conversion_names[symbol.__class__],
305 symbol_path,
306 preferred_parent_name,
309 self.symbol_info_file.write(' # %s\n' % (stats,))
310 parent_counts = stats.possible_parents.items()
311 if parent_counts:
312 self.symbol_info_file.write(' # Possible parents:\n')
313 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
314 for (pp, count) in parent_counts:
315 if isinstance(pp, Trunk):
316 self.symbol_info_file.write(
317 ' # .trunk. : %d\n' % (count,)
319 else:
320 self.symbol_info_file.write(
321 ' # %s : %d\n' % (pp.name, count,)
324 def get_symbols(self, run_options):
325 """Return a map telling how to convert symbols.
327 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
328 indicating how each symbol should be converted. Trunk objects in
329 SYMBOL_STATS are passed through unchanged. One object is included
330 in the return value for each line of development described in
331 SYMBOL_STATS.
333 Raise FatalError if there was an error."""
335 errors = []
336 mismatches = []
338 if Ctx().symbol_info_filename is not None:
339 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
340 self.symbol_info_file.write(
341 '# Columns: project_id symbol_name conversion symbol_path '
342 'preferred_parent_name\n'
344 else:
345 self.symbol_info_file = None
347 # Initialize each symbol strategy rule a single time, even if it
348 # is used in more than one project. First define a map from
349 # object id to symbol strategy rule:
350 rules = {}
351 for rule_list in run_options.project_symbol_strategy_rules:
352 for rule in rule_list:
353 rules[id(rule)] = rule
355 for rule in rules.itervalues():
356 rule.start(self.symbol_stats)
358 retval = {}
360 for stats in self.symbol_stats:
361 try:
362 symbol = self.get_symbol(run_options, stats)
363 except IndeterminateSymbolException, e:
364 self.log_symbol_summary(stats, stats.lod)
365 mismatches.append(e.stats)
366 except SymbolPlanError, e:
367 self.log_symbol_summary(stats, stats.lod)
368 errors.append(e)
369 else:
370 self.log_symbol_summary(stats, symbol)
371 retval[stats.lod] = symbol
373 for rule in rules.itervalues():
374 rule.finish()
376 if self.symbol_info_file:
377 self.symbol_info_file.close()
379 del self.symbol_info_file
381 if errors or mismatches:
382 s = ['Problems determining how symbols should be converted:\n']
383 for e in errors:
384 s.append('%s\n' % (e,))
385 if mismatches:
386 s.append(
387 'It is not clear how the following symbols '
388 'should be converted.\n'
389 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
390 'and/or\n'
391 '--symbol-default to resolve the ambiguity.\n'
393 for stats in mismatches:
394 s.append(' %s\n' % (stats,))
395 raise FatalError(''.join(s))
396 else:
397 return retval
399 def run(self, run_options, stats_keeper):
400 Ctx()._projects = read_projects(
401 artifact_manager.get_temp_file(config.PROJECTS)
403 self.symbol_stats = SymbolStatistics(
404 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
407 symbol_map = self.get_symbols(run_options)
409 # Check the symbols for consistency and bail out if there were errors:
410 self.symbol_stats.check_consistency(symbol_map)
412 # Check that the symbols all have SVN paths set and that the paths
413 # are disjoint:
414 Ctx().output_option.check_symbols(symbol_map)
416 for symbol in symbol_map.itervalues():
417 if isinstance(symbol, ExcludedSymbol):
418 self.symbol_stats.exclude_symbol(symbol)
420 create_symbol_database(symbol_map.values())
422 del self.symbol_stats
424 logger.quiet("Done")
427 class FilterSymbolsPass(Pass):
428 """Delete any branches/tags that are to be excluded.
430 Also delete revisions on excluded branches, and delete other
431 references to the excluded symbols."""
433 def register_artifacts(self):
434 self._register_temp_file(config.ITEM_SERIALIZER)
435 self._register_temp_file(config.CVS_REVS_DATAFILE)
436 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
437 self._register_temp_file_needed(config.PROJECTS)
438 self._register_temp_file_needed(config.SYMBOL_DB)
439 self._register_temp_file_needed(config.CVS_PATHS_DB)
440 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
441 Ctx().revision_collector.register_artifacts(self)
443 def run(self, run_options, stats_keeper):
444 Ctx()._projects = read_projects(
445 artifact_manager.get_temp_file(config.PROJECTS)
447 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
448 Ctx()._symbol_db = SymbolDatabase()
449 cvs_item_store = OldCVSItemStore(
450 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
452 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
453 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
454 cPickle.dump(cvs_item_serializer, f, -1)
455 f.close()
457 rev_db = NewSortableCVSRevisionDatabase(
458 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
459 cvs_item_serializer,
462 symbol_db = NewSortableCVSSymbolDatabase(
463 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
464 cvs_item_serializer,
467 revision_collector = Ctx().revision_collector
469 logger.quiet("Filtering out excluded symbols and summarizing items...")
471 stats_keeper.reset_cvs_rev_info()
472 revision_collector.start()
474 # Process the cvs items store one file at a time:
475 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
476 logger.verbose(cvs_file_items.cvs_file.filename)
477 cvs_file_items.filter_excluded_symbols()
478 cvs_file_items.mutate_symbols()
479 cvs_file_items.adjust_parents()
480 cvs_file_items.refine_symbols()
481 cvs_file_items.determine_revision_properties(
482 Ctx().revision_property_setters
484 cvs_file_items.record_opened_symbols()
485 cvs_file_items.record_closed_symbols()
486 cvs_file_items.check_link_consistency()
488 # Give the revision collector a chance to collect data about the
489 # file:
490 revision_collector.process_file(cvs_file_items)
492 # Store whatever is left to the new file and update statistics:
493 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
494 for cvs_item in cvs_file_items.values():
495 stats_keeper.record_cvs_item(cvs_item)
497 if isinstance(cvs_item, CVSRevision):
498 rev_db.add(cvs_item)
499 elif isinstance(cvs_item, CVSSymbol):
500 symbol_db.add(cvs_item)
502 stats_keeper.set_stats_reflect_exclude(True)
504 rev_db.close()
505 symbol_db.close()
506 revision_collector.finish()
507 cvs_item_store.close()
508 Ctx()._symbol_db.close()
509 Ctx()._cvs_path_db.close()
511 logger.quiet("Done")
514 class SortRevisionsPass(Pass):
515 """Sort the revisions file."""
517 def register_artifacts(self):
518 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
519 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
521 def run(self, run_options, stats_keeper):
522 logger.quiet("Sorting CVS revision summaries...")
523 sort_file(
524 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
525 artifact_manager.get_temp_file(
526 config.CVS_REVS_SORTED_DATAFILE
528 tempdirs=[Ctx().tmpdir],
530 logger.quiet("Done")
533 class SortSymbolsPass(Pass):
534 """Sort the symbols file."""
536 def register_artifacts(self):
537 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
538 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
540 def run(self, run_options, stats_keeper):
541 logger.quiet("Sorting CVS symbol summaries...")
542 sort_file(
543 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
544 artifact_manager.get_temp_file(
545 config.CVS_SYMBOLS_SORTED_DATAFILE
547 tempdirs=[Ctx().tmpdir],
549 logger.quiet("Done")
552 class InitializeChangesetsPass(Pass):
553 """Create preliminary CommitSets."""
555 def register_artifacts(self):
556 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
557 self._register_temp_file(config.CHANGESETS_STORE)
558 self._register_temp_file(config.CHANGESETS_INDEX)
559 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
560 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
561 self._register_temp_file_needed(config.PROJECTS)
562 self._register_temp_file_needed(config.SYMBOL_DB)
563 self._register_temp_file_needed(config.CVS_PATHS_DB)
564 self._register_temp_file_needed(config.ITEM_SERIALIZER)
565 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
566 self._register_temp_file_needed(
567 config.CVS_SYMBOLS_SORTED_DATAFILE)
569 def get_revision_changesets(self):
570 """Generate revision changesets, one at a time.
572 Each time, yield a list of CVSRevisions that might potentially
573 consititute a changeset."""
575 # Create changesets for CVSRevisions:
576 old_metadata_id = None
577 old_timestamp = None
578 changeset_items = []
580 db = OldSortableCVSRevisionDatabase(
581 artifact_manager.get_temp_file(
582 config.CVS_REVS_SORTED_DATAFILE
584 self.cvs_item_serializer,
587 for cvs_rev in db:
588 if cvs_rev.metadata_id != old_metadata_id \
589 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
590 # Start a new changeset. First finish up the old changeset,
591 # if any:
592 if changeset_items:
593 yield changeset_items
594 changeset_items = []
595 old_metadata_id = cvs_rev.metadata_id
596 changeset_items.append(cvs_rev)
597 old_timestamp = cvs_rev.timestamp
599 # Finish up the last changeset, if any:
600 if changeset_items:
601 yield changeset_items
603 def get_symbol_changesets(self):
604 """Generate symbol changesets, one at a time.
606 Each time, yield a list of CVSSymbols that might potentially
607 consititute a changeset."""
609 old_symbol_id = None
610 changeset_items = []
612 db = OldSortableCVSSymbolDatabase(
613 artifact_manager.get_temp_file(
614 config.CVS_SYMBOLS_SORTED_DATAFILE
616 self.cvs_item_serializer,
619 for cvs_symbol in db:
620 if cvs_symbol.symbol.id != old_symbol_id:
621 # Start a new changeset. First finish up the old changeset,
622 # if any:
623 if changeset_items:
624 yield changeset_items
625 changeset_items = []
626 old_symbol_id = cvs_symbol.symbol.id
627 changeset_items.append(cvs_symbol)
629 # Finish up the last changeset, if any:
630 if changeset_items:
631 yield changeset_items
633 @staticmethod
634 def compare_items(a, b):
635 return (
636 cmp(a.timestamp, b.timestamp)
637 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
638 or cmp([int(x) for x in a.rev.split('.')],
639 [int(x) for x in b.rev.split('.')])
640 or cmp(a.id, b.id))
642 def break_internal_dependencies(self, changeset_items):
643 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
645 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
646 belong in a single RevisionChangeset, but there might be internal
647 dependencies among the items. Return a list of lists, where each
648 sublist is a list of CVSRevisions and at least one internal
649 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
650 to be split, then the return value will contain a single value,
651 namely the original value of CHANGESET_ITEMS. Split
652 CHANGESET_ITEMS at most once, even though the resulting changesets
653 might themselves have internal dependencies."""
655 # We only look for succ dependencies, since by doing so we
656 # automatically cover pred dependencies as well. First create a
657 # list of tuples (pred, succ) of id pairs for CVSItems that depend
658 # on each other.
659 dependencies = []
660 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
661 for cvs_item in changeset_items:
662 for next_id in cvs_item.get_succ_ids():
663 if next_id in changeset_cvs_item_ids:
664 # Sanity check: a CVSItem should never depend on itself:
665 if next_id == cvs_item.id:
666 raise InternalError('Item depends on itself: %s' % (cvs_item,))
668 dependencies.append((cvs_item.id, next_id,))
670 if dependencies:
671 # Sort the changeset_items in a defined order (chronological to the
672 # extent that the timestamps are correct and unique).
673 changeset_items.sort(self.compare_items)
674 indexes = {}
675 for (i, changeset_item) in enumerate(changeset_items):
676 indexes[changeset_item.id] = i
677 # How many internal dependencies would be broken by breaking the
678 # Changeset after a particular index?
679 breaks = [0] * len(changeset_items)
680 for (pred, succ,) in dependencies:
681 pred_index = indexes[pred]
682 succ_index = indexes[succ]
683 breaks[min(pred_index, succ_index)] += 1
684 breaks[max(pred_index, succ_index)] -= 1
685 best_i = None
686 best_count = -1
687 best_time = 0
688 for i in range(1, len(breaks)):
689 breaks[i] += breaks[i - 1]
690 for i in range(0, len(breaks) - 1):
691 if breaks[i] > best_count:
692 best_i = i
693 best_count = breaks[i]
694 best_time = (changeset_items[i + 1].timestamp
695 - changeset_items[i].timestamp)
696 elif breaks[i] == best_count \
697 and (changeset_items[i + 1].timestamp
698 - changeset_items[i].timestamp) < best_time:
699 best_i = i
700 best_count = breaks[i]
701 best_time = (changeset_items[i + 1].timestamp
702 - changeset_items[i].timestamp)
703 # Reuse the old changeset.id for the first of the split changesets.
704 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
705 else:
706 return [changeset_items]
708 def break_all_internal_dependencies(self, changeset_items):
709 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
711 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
712 be part of a single changeset. Break this list into sublists,
713 where the CVSRevisions in each sublist are free of mutual
714 dependencies."""
716 # This method is written non-recursively to avoid any possible
717 # problems with recursion depth.
719 changesets_to_split = [changeset_items]
720 while changesets_to_split:
721 changesets = self.break_internal_dependencies(changesets_to_split.pop())
722 if len(changesets) == 1:
723 [changeset_items] = changesets
724 yield changeset_items
725 else:
726 # The changeset had to be split; see if either of the
727 # fragments have to be split:
728 changesets.reverse()
729 changesets_to_split.extend(changesets)
731 def get_changesets(self):
732 """Generate (Changeset, [CVSItem,...]) for all changesets.
734 The Changesets already have their internal dependencies broken.
735 The [CVSItem,...] list is the list of CVSItems in the
736 corresponding Changeset."""
738 for changeset_items in self.get_revision_changesets():
739 for split_changeset_items \
740 in self.break_all_internal_dependencies(changeset_items):
741 yield (
742 RevisionChangeset(
743 self.changeset_key_generator.gen_id(),
744 [cvs_rev.id for cvs_rev in split_changeset_items]
746 split_changeset_items,
749 for changeset_items in self.get_symbol_changesets():
750 yield (
751 create_symbol_changeset(
752 self.changeset_key_generator.gen_id(),
753 changeset_items[0].symbol,
754 [cvs_symbol.id for cvs_symbol in changeset_items]
756 changeset_items,
759 def run(self, run_options, stats_keeper):
760 logger.quiet("Creating preliminary commit sets...")
762 Ctx()._projects = read_projects(
763 artifact_manager.get_temp_file(config.PROJECTS)
765 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
766 Ctx()._symbol_db = SymbolDatabase()
768 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
769 self.cvs_item_serializer = cPickle.load(f)
770 f.close()
772 changeset_db = ChangesetDatabase(
773 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
774 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
775 DB_OPEN_NEW,
777 cvs_item_to_changeset_id = CVSItemToChangesetTable(
778 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
779 DB_OPEN_NEW,
782 self.sorted_cvs_items_db = IndexedCVSItemStore(
783 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
784 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
785 DB_OPEN_NEW)
787 self.changeset_key_generator = KeyGenerator()
789 for (changeset, changeset_items) in self.get_changesets():
790 if logger.is_on(logger.DEBUG):
791 logger.debug(repr(changeset))
792 changeset_db.store(changeset)
793 for cvs_item in changeset_items:
794 self.sorted_cvs_items_db.add(cvs_item)
795 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
797 self.sorted_cvs_items_db.close()
798 cvs_item_to_changeset_id.close()
799 changeset_db.close()
800 Ctx()._symbol_db.close()
801 Ctx()._cvs_path_db.close()
803 del self.cvs_item_serializer
805 logger.quiet("Done")
808 class ProcessedChangesetLogger:
809 def __init__(self):
810 self.processed_changeset_ids = []
812 def log(self, changeset_id):
813 if logger.is_on(logger.DEBUG):
814 self.processed_changeset_ids.append(changeset_id)
816 def flush(self):
817 if self.processed_changeset_ids:
818 logger.debug(
819 'Consumed changeset ids %s'
820 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
822 del self.processed_changeset_ids[:]
825 class BreakRevisionChangesetCyclesPass(Pass):
826 """Break up any dependency cycles involving only RevisionChangesets."""
828 def register_artifacts(self):
829 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
830 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
831 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
832 self._register_temp_file_needed(config.PROJECTS)
833 self._register_temp_file_needed(config.SYMBOL_DB)
834 self._register_temp_file_needed(config.CVS_PATHS_DB)
835 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
836 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
837 self._register_temp_file_needed(config.CHANGESETS_STORE)
838 self._register_temp_file_needed(config.CHANGESETS_INDEX)
839 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
841 def get_source_changesets(self):
842 old_changeset_db = ChangesetDatabase(
843 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
844 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
845 DB_OPEN_READ)
847 changeset_ids = old_changeset_db.keys()
849 for changeset_id in changeset_ids:
850 yield old_changeset_db[changeset_id]
852 old_changeset_db.close()
853 del old_changeset_db
855 def break_cycle(self, cycle):
856 """Break up one or more changesets in CYCLE to help break the cycle.
858 CYCLE is a list of Changesets where
860 cycle[i] depends on cycle[i - 1]
862 Break up one or more changesets in CYCLE to make progress towards
863 breaking the cycle. Update self.changeset_graph accordingly.
865 It is not guaranteed that the cycle will be broken by one call to
866 this routine, but at least some progress must be made."""
868 self.processed_changeset_logger.flush()
869 best_i = None
870 best_link = None
871 for i in range(len(cycle)):
872 # It's OK if this index wraps to -1:
873 link = ChangesetGraphLink(
874 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
876 if best_i is None or link < best_link:
877 best_i = i
878 best_link = link
880 if logger.is_on(logger.DEBUG):
881 logger.debug(
882 'Breaking cycle %s by breaking node %x' % (
883 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
884 best_link.changeset.id,))
886 new_changesets = best_link.break_changeset(self.changeset_key_generator)
888 self.changeset_graph.delete_changeset(best_link.changeset)
890 for changeset in new_changesets:
891 self.changeset_graph.add_new_changeset(changeset)
893 def run(self, run_options, stats_keeper):
894 logger.quiet("Breaking revision changeset dependency cycles...")
896 Ctx()._projects = read_projects(
897 artifact_manager.get_temp_file(config.PROJECTS)
899 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
900 Ctx()._symbol_db = SymbolDatabase()
901 Ctx()._cvs_items_db = IndexedCVSItemStore(
902 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
903 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
904 DB_OPEN_READ)
906 shutil.copyfile(
907 artifact_manager.get_temp_file(
908 config.CVS_ITEM_TO_CHANGESET),
909 artifact_manager.get_temp_file(
910 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
911 cvs_item_to_changeset_id = CVSItemToChangesetTable(
912 artifact_manager.get_temp_file(
913 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
914 DB_OPEN_WRITE)
916 changeset_db = ChangesetDatabase(
917 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
918 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
919 DB_OPEN_NEW)
921 self.changeset_graph = ChangesetGraph(
922 changeset_db, cvs_item_to_changeset_id
925 max_changeset_id = 0
926 for changeset in self.get_source_changesets():
927 changeset_db.store(changeset)
928 if isinstance(changeset, RevisionChangeset):
929 self.changeset_graph.add_changeset(changeset)
930 max_changeset_id = max(max_changeset_id, changeset.id)
932 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
934 self.processed_changeset_logger = ProcessedChangesetLogger()
936 # Consume the graph, breaking cycles using self.break_cycle():
937 for (changeset, time_range) in self.changeset_graph.consume_graph(
938 cycle_breaker=self.break_cycle
940 self.processed_changeset_logger.log(changeset.id)
942 self.processed_changeset_logger.flush()
943 del self.processed_changeset_logger
945 self.changeset_graph.close()
946 self.changeset_graph = None
947 Ctx()._cvs_items_db.close()
948 Ctx()._symbol_db.close()
949 Ctx()._cvs_path_db.close()
951 logger.quiet("Done")
954 class RevisionTopologicalSortPass(Pass):
955 """Sort RevisionChangesets into commit order.
957 Also convert them to OrderedChangesets, without changing their ids."""
959 def register_artifacts(self):
960 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
961 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
962 self._register_temp_file_needed(config.PROJECTS)
963 self._register_temp_file_needed(config.SYMBOL_DB)
964 self._register_temp_file_needed(config.CVS_PATHS_DB)
965 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
966 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
967 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
968 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
969 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
971 def get_source_changesets(self, changeset_db):
972 changeset_ids = changeset_db.keys()
974 for changeset_id in changeset_ids:
975 yield changeset_db[changeset_id]
977 def get_changesets(self):
978 changeset_db = ChangesetDatabase(
979 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
980 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
981 DB_OPEN_READ,
984 changeset_graph = ChangesetGraph(
985 changeset_db,
986 CVSItemToChangesetTable(
987 artifact_manager.get_temp_file(
988 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
990 DB_OPEN_READ,
994 for changeset in self.get_source_changesets(changeset_db):
995 if isinstance(changeset, RevisionChangeset):
996 changeset_graph.add_changeset(changeset)
997 else:
998 yield changeset
1000 changeset_ids = []
1002 # Sentry:
1003 changeset_ids.append(None)
1005 for (changeset, time_range) in changeset_graph.consume_graph():
1006 changeset_ids.append(changeset.id)
1008 # Sentry:
1009 changeset_ids.append(None)
1011 for i in range(1, len(changeset_ids) - 1):
1012 changeset = changeset_db[changeset_ids[i]]
1013 yield OrderedChangeset(
1014 changeset.id, changeset.cvs_item_ids, i - 1,
1015 changeset_ids[i - 1], changeset_ids[i + 1])
1017 changeset_graph.close()
1019 def run(self, run_options, stats_keeper):
1020 logger.quiet("Generating CVSRevisions in commit order...")
1022 Ctx()._projects = read_projects(
1023 artifact_manager.get_temp_file(config.PROJECTS)
1025 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1026 Ctx()._symbol_db = SymbolDatabase()
1027 Ctx()._cvs_items_db = IndexedCVSItemStore(
1028 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1029 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1030 DB_OPEN_READ)
1032 changesets_revordered_db = ChangesetDatabase(
1033 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1034 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1035 DB_OPEN_NEW)
1037 for changeset in self.get_changesets():
1038 changesets_revordered_db.store(changeset)
1040 changesets_revordered_db.close()
1041 Ctx()._cvs_items_db.close()
1042 Ctx()._symbol_db.close()
1043 Ctx()._cvs_path_db.close()
1045 logger.quiet("Done")
1048 class BreakSymbolChangesetCyclesPass(Pass):
1049 """Break up any dependency cycles involving only SymbolChangesets."""
1051 def register_artifacts(self):
1052 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1053 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1054 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1055 self._register_temp_file_needed(config.PROJECTS)
1056 self._register_temp_file_needed(config.SYMBOL_DB)
1057 self._register_temp_file_needed(config.CVS_PATHS_DB)
1058 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1059 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1060 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1061 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1062 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1064 def get_source_changesets(self):
1065 old_changeset_db = ChangesetDatabase(
1066 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1067 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1068 DB_OPEN_READ)
1070 changeset_ids = old_changeset_db.keys()
1072 for changeset_id in changeset_ids:
1073 yield old_changeset_db[changeset_id]
1075 old_changeset_db.close()
1077 def break_cycle(self, cycle):
1078 """Break up one or more changesets in CYCLE to help break the cycle.
1080 CYCLE is a list of Changesets where
1082 cycle[i] depends on cycle[i - 1]
1084 Break up one or more changesets in CYCLE to make progress towards
1085 breaking the cycle. Update self.changeset_graph accordingly.
1087 It is not guaranteed that the cycle will be broken by one call to
1088 this routine, but at least some progress must be made."""
1090 self.processed_changeset_logger.flush()
1091 best_i = None
1092 best_link = None
1093 for i in range(len(cycle)):
1094 # It's OK if this index wraps to -1:
1095 link = ChangesetGraphLink(
1096 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1098 if best_i is None or link < best_link:
1099 best_i = i
1100 best_link = link
1102 if logger.is_on(logger.DEBUG):
1103 logger.debug(
1104 'Breaking cycle %s by breaking node %x' % (
1105 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1106 best_link.changeset.id,))
1108 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1110 self.changeset_graph.delete_changeset(best_link.changeset)
1112 for changeset in new_changesets:
1113 self.changeset_graph.add_new_changeset(changeset)
1115 def run(self, run_options, stats_keeper):
1116 logger.quiet("Breaking symbol changeset dependency cycles...")
1118 Ctx()._projects = read_projects(
1119 artifact_manager.get_temp_file(config.PROJECTS)
1121 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1122 Ctx()._symbol_db = SymbolDatabase()
1123 Ctx()._cvs_items_db = IndexedCVSItemStore(
1124 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1125 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1126 DB_OPEN_READ)
1128 shutil.copyfile(
1129 artifact_manager.get_temp_file(
1130 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1131 artifact_manager.get_temp_file(
1132 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1133 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1134 artifact_manager.get_temp_file(
1135 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1136 DB_OPEN_WRITE)
1138 changeset_db = ChangesetDatabase(
1139 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1140 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1141 DB_OPEN_NEW)
1143 self.changeset_graph = ChangesetGraph(
1144 changeset_db, cvs_item_to_changeset_id
1147 max_changeset_id = 0
1148 for changeset in self.get_source_changesets():
1149 changeset_db.store(changeset)
1150 if isinstance(changeset, SymbolChangeset):
1151 self.changeset_graph.add_changeset(changeset)
1152 max_changeset_id = max(max_changeset_id, changeset.id)
1154 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1156 self.processed_changeset_logger = ProcessedChangesetLogger()
1158 # Consume the graph, breaking cycles using self.break_cycle():
1159 for (changeset, time_range) in self.changeset_graph.consume_graph(
1160 cycle_breaker=self.break_cycle
1162 self.processed_changeset_logger.log(changeset.id)
1164 self.processed_changeset_logger.flush()
1165 del self.processed_changeset_logger
1167 self.changeset_graph.close()
1168 self.changeset_graph = None
1169 Ctx()._cvs_items_db.close()
1170 Ctx()._symbol_db.close()
1171 Ctx()._cvs_path_db.close()
1173 logger.quiet("Done")
1176 class BreakAllChangesetCyclesPass(Pass):
1177 """Break up any dependency cycles that are closed by SymbolChangesets."""
1179 def register_artifacts(self):
1180 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1181 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1182 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1183 self._register_temp_file_needed(config.PROJECTS)
1184 self._register_temp_file_needed(config.SYMBOL_DB)
1185 self._register_temp_file_needed(config.CVS_PATHS_DB)
1186 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1187 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1188 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1189 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1190 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1192 def get_source_changesets(self):
1193 old_changeset_db = ChangesetDatabase(
1194 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1195 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1196 DB_OPEN_READ)
1198 changeset_ids = old_changeset_db.keys()
1200 for changeset_id in changeset_ids:
1201 yield old_changeset_db[changeset_id]
1203 old_changeset_db.close()
1205 def _split_retrograde_changeset(self, changeset):
1206 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1208 logger.debug('Breaking retrograde changeset %x' % (changeset.id,))
1210 self.changeset_graph.delete_changeset(changeset)
1212 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1213 ordinal_limits = {}
1214 for cvs_branch in changeset.iter_cvs_items():
1215 max_pred_ordinal = 0
1216 min_succ_ordinal = sys.maxint
1218 for pred_id in cvs_branch.get_pred_ids():
1219 pred_ordinal = self.ordinals.get(
1220 self.cvs_item_to_changeset_id[pred_id], 0)
1221 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1223 for succ_id in cvs_branch.get_succ_ids():
1224 succ_ordinal = self.ordinals.get(
1225 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1226 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1228 assert max_pred_ordinal < min_succ_ordinal
1229 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1231 # Find the earliest successor ordinal:
1232 min_min_succ_ordinal = sys.maxint
1233 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1234 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1236 early_item_ids = []
1237 late_item_ids = []
1238 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1239 if max_pred_ordinal >= min_min_succ_ordinal:
1240 late_item_ids.append(id)
1241 else:
1242 early_item_ids.append(id)
1244 assert early_item_ids
1245 assert late_item_ids
1247 early_changeset = changeset.create_split_changeset(
1248 self.changeset_key_generator.gen_id(), early_item_ids)
1249 late_changeset = changeset.create_split_changeset(
1250 self.changeset_key_generator.gen_id(), late_item_ids)
1252 self.changeset_graph.add_new_changeset(early_changeset)
1253 self.changeset_graph.add_new_changeset(late_changeset)
1255 early_split = self._split_if_retrograde(early_changeset.id)
1257 # Because of the way we constructed it, the early changeset should
1258 # not have to be split:
1259 assert not early_split
1261 self._split_if_retrograde(late_changeset.id)
1263 def _split_if_retrograde(self, changeset_id):
1264 node = self.changeset_graph[changeset_id]
1265 pred_ordinals = [
1266 self.ordinals[id]
1267 for id in node.pred_ids
1268 if id in self.ordinals
1270 pred_ordinals.sort()
1271 succ_ordinals = [
1272 self.ordinals[id]
1273 for id in node.succ_ids
1274 if id in self.ordinals
1276 succ_ordinals.sort()
1277 if pred_ordinals and succ_ordinals \
1278 and pred_ordinals[-1] >= succ_ordinals[0]:
1279 self._split_retrograde_changeset(self.changeset_db[node.id])
1280 return True
1281 else:
1282 return False
1284 def break_segment(self, segment):
1285 """Break a changeset in SEGMENT[1:-1].
1287 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1288 that range are SymbolChangesets."""
1290 best_i = None
1291 best_link = None
1292 for i in range(1, len(segment) - 1):
1293 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1295 if best_i is None or link < best_link:
1296 best_i = i
1297 best_link = link
1299 if logger.is_on(logger.DEBUG):
1300 logger.debug(
1301 'Breaking segment %s by breaking node %x' % (
1302 ' -> '.join(['%x' % node.id for node in segment]),
1303 best_link.changeset.id,))
1305 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1307 self.changeset_graph.delete_changeset(best_link.changeset)
1309 for changeset in new_changesets:
1310 self.changeset_graph.add_new_changeset(changeset)
1312 def break_cycle(self, cycle):
1313 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1315 CYCLE is a list of SymbolChangesets where
1317 cycle[i] depends on cycle[i - 1]
1319 . Break up one or more changesets in CYCLE to make progress
1320 towards breaking the cycle. Update self.changeset_graph
1321 accordingly.
1323 It is not guaranteed that the cycle will be broken by one call to
1324 this routine, but at least some progress must be made."""
1326 if logger.is_on(logger.DEBUG):
1327 logger.debug(
1328 'Breaking cycle %s' % (
1329 ' -> '.join(['%x' % changeset.id
1330 for changeset in cycle + [cycle[0]]]),))
1332 # Unwrap the cycle into a segment then break the segment:
1333 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1335 def run(self, run_options, stats_keeper):
1336 logger.quiet("Breaking CVSSymbol dependency loops...")
1338 Ctx()._projects = read_projects(
1339 artifact_manager.get_temp_file(config.PROJECTS)
1341 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1342 Ctx()._symbol_db = SymbolDatabase()
1343 Ctx()._cvs_items_db = IndexedCVSItemStore(
1344 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1345 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1346 DB_OPEN_READ)
1348 shutil.copyfile(
1349 artifact_manager.get_temp_file(
1350 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1351 artifact_manager.get_temp_file(
1352 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1353 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1354 artifact_manager.get_temp_file(
1355 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1356 DB_OPEN_WRITE)
1358 self.changeset_db = ChangesetDatabase(
1359 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1360 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1361 DB_OPEN_NEW)
1363 self.changeset_graph = ChangesetGraph(
1364 self.changeset_db, self.cvs_item_to_changeset_id
1367 # A map {changeset_id : ordinal} for OrderedChangesets:
1368 self.ordinals = {}
1369 # A map {ordinal : changeset_id}:
1370 ordered_changeset_map = {}
1371 # A list of all BranchChangeset ids:
1372 branch_changeset_ids = []
1373 max_changeset_id = 0
1374 for changeset in self.get_source_changesets():
1375 self.changeset_db.store(changeset)
1376 self.changeset_graph.add_changeset(changeset)
1377 if isinstance(changeset, OrderedChangeset):
1378 ordered_changeset_map[changeset.ordinal] = changeset.id
1379 self.ordinals[changeset.id] = changeset.ordinal
1380 elif isinstance(changeset, BranchChangeset):
1381 branch_changeset_ids.append(changeset.id)
1382 max_changeset_id = max(max_changeset_id, changeset.id)
1384 # An array of ordered_changeset ids, indexed by ordinal:
1385 ordered_changesets = []
1386 for ordinal in range(len(ordered_changeset_map)):
1387 id = ordered_changeset_map[ordinal]
1388 ordered_changesets.append(id)
1390 ordered_changeset_ids = set(ordered_changeset_map.values())
1391 del ordered_changeset_map
1393 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1395 # First we scan through all BranchChangesets looking for
1396 # changesets that are individually "retrograde" and splitting
1397 # those up:
1398 for changeset_id in branch_changeset_ids:
1399 self._split_if_retrograde(changeset_id)
1401 del self.ordinals
1403 next_ordered_changeset = 0
1405 self.processed_changeset_logger = ProcessedChangesetLogger()
1407 while self.changeset_graph:
1408 # Consume any nodes that don't have predecessors:
1409 for (changeset, time_range) \
1410 in self.changeset_graph.consume_nopred_nodes():
1411 self.processed_changeset_logger.log(changeset.id)
1412 if changeset.id in ordered_changeset_ids:
1413 next_ordered_changeset += 1
1414 ordered_changeset_ids.remove(changeset.id)
1416 self.processed_changeset_logger.flush()
1418 if not self.changeset_graph:
1419 break
1421 # Now work on the next ordered changeset that has not yet been
1422 # processed. BreakSymbolChangesetCyclesPass has broken any
1423 # cycles involving only SymbolChangesets, so the presence of a
1424 # cycle implies that there is at least one ordered changeset
1425 # left in the graph:
1426 assert next_ordered_changeset < len(ordered_changesets)
1428 id = ordered_changesets[next_ordered_changeset]
1429 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1430 if path:
1431 if logger.is_on(logger.DEBUG):
1432 logger.debug('Breaking path from %s to %s' % (path[0], path[-1],))
1433 self.break_segment(path)
1434 else:
1435 # There were no ordered changesets among the reachable
1436 # predecessors, so do generic cycle-breaking:
1437 if logger.is_on(logger.DEBUG):
1438 logger.debug(
1439 'Breaking generic cycle found from %s'
1440 % (self.changeset_db[id],)
1442 self.break_cycle(self.changeset_graph.find_cycle(id))
1444 del self.processed_changeset_logger
1445 self.changeset_graph.close()
1446 self.changeset_graph = None
1447 self.cvs_item_to_changeset_id = None
1448 self.changeset_db = None
1450 logger.quiet("Done")
1453 class TopologicalSortPass(Pass):
1454 """Sort changesets into commit order."""
1456 def register_artifacts(self):
1457 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1458 self._register_temp_file_needed(config.PROJECTS)
1459 self._register_temp_file_needed(config.SYMBOL_DB)
1460 self._register_temp_file_needed(config.CVS_PATHS_DB)
1461 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1462 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1463 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1464 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1465 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1467 def get_source_changesets(self, changeset_db):
1468 for changeset_id in changeset_db.keys():
1469 yield changeset_db[changeset_id]
1471 def get_changesets(self):
1472 """Generate (changeset, timestamp) pairs in commit order."""
1474 changeset_db = ChangesetDatabase(
1475 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1476 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1477 DB_OPEN_READ)
1479 changeset_graph = ChangesetGraph(
1480 changeset_db,
1481 CVSItemToChangesetTable(
1482 artifact_manager.get_temp_file(
1483 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1485 DB_OPEN_READ,
1488 symbol_changeset_ids = set()
1490 for changeset in self.get_source_changesets(changeset_db):
1491 changeset_graph.add_changeset(changeset)
1492 if isinstance(changeset, SymbolChangeset):
1493 symbol_changeset_ids.add(changeset.id)
1495 # Ensure a monotonically-increasing timestamp series by keeping
1496 # track of the previous timestamp and ensuring that the following
1497 # one is larger.
1498 timestamper = Timestamper()
1500 for (changeset, time_range) in changeset_graph.consume_graph():
1501 timestamp = timestamper.get(
1502 time_range.t_max, changeset.id in symbol_changeset_ids
1504 yield (changeset, timestamp)
1506 changeset_graph.close()
1508 def run(self, run_options, stats_keeper):
1509 logger.quiet("Generating CVSRevisions in commit order...")
1511 Ctx()._projects = read_projects(
1512 artifact_manager.get_temp_file(config.PROJECTS)
1514 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1515 Ctx()._symbol_db = SymbolDatabase()
1516 Ctx()._cvs_items_db = IndexedCVSItemStore(
1517 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1518 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1519 DB_OPEN_READ)
1521 sorted_changesets = open(
1522 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1523 'w')
1525 for (changeset, timestamp) in self.get_changesets():
1526 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1528 sorted_changesets.close()
1530 Ctx()._cvs_items_db.close()
1531 Ctx()._symbol_db.close()
1532 Ctx()._cvs_path_db.close()
1534 logger.quiet("Done")
1537 class CreateRevsPass(Pass):
1538 """Generate the SVNCommit <-> CVSRevision mapping databases.
1540 SVNCommitCreator also calls SymbolingsLogger to register
1541 CVSRevisions that represent an opening or closing for a path on a
1542 branch or tag. See SymbolingsLogger for more details.
1544 This pass was formerly known as pass5."""
1546 def register_artifacts(self):
1547 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1548 self._register_temp_file(config.SVN_COMMITS_STORE)
1549 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1550 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1551 self._register_temp_file_needed(config.PROJECTS)
1552 self._register_temp_file_needed(config.CVS_PATHS_DB)
1553 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1554 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1555 self._register_temp_file_needed(config.SYMBOL_DB)
1556 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1557 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1558 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1560 def get_changesets(self):
1561 """Generate (changeset,timestamp,) tuples in commit order."""
1563 changeset_db = ChangesetDatabase(
1564 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1565 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1566 DB_OPEN_READ)
1568 for line in file(
1569 artifact_manager.get_temp_file(
1570 config.CHANGESETS_SORTED_DATAFILE)):
1571 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1572 yield (changeset_db[changeset_id], timestamp)
1574 changeset_db.close()
1576 def get_svn_commits(self, creator):
1577 """Generate the SVNCommits, in order."""
1579 for (changeset, timestamp) in self.get_changesets():
1580 for svn_commit in creator.process_changeset(changeset, timestamp):
1581 yield svn_commit
1583 def log_svn_commit(self, svn_commit):
1584 """Output information about SVN_COMMIT."""
1586 logger.normal(
1587 'Creating Subversion r%d (%s)'
1588 % (svn_commit.revnum, svn_commit.get_description(),)
1591 if isinstance(svn_commit, SVNRevisionCommit):
1592 for cvs_rev in svn_commit.cvs_revs:
1593 logger.verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1595 def run(self, run_options, stats_keeper):
1596 logger.quiet("Mapping CVS revisions to Subversion commits...")
1598 Ctx()._projects = read_projects(
1599 artifact_manager.get_temp_file(config.PROJECTS)
1601 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1602 Ctx()._symbol_db = SymbolDatabase()
1603 Ctx()._cvs_items_db = IndexedCVSItemStore(
1604 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1605 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1606 DB_OPEN_READ)
1608 Ctx()._symbolings_logger = SymbolingsLogger()
1610 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1612 creator = SVNCommitCreator()
1613 for svn_commit in self.get_svn_commits(creator):
1614 self.log_svn_commit(svn_commit)
1615 persistence_manager.put_svn_commit(svn_commit)
1617 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1618 del creator
1620 persistence_manager.close()
1621 Ctx()._symbolings_logger.close()
1622 Ctx()._cvs_items_db.close()
1623 Ctx()._symbol_db.close()
1624 Ctx()._cvs_path_db.close()
1626 logger.quiet("Done")
1629 class SortSymbolOpeningsClosingsPass(Pass):
1630 """This pass was formerly known as pass6."""
1632 def register_artifacts(self):
1633 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1634 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1636 def run(self, run_options, stats_keeper):
1637 logger.quiet("Sorting symbolic name source revisions...")
1639 def sort_key(line):
1640 line = line.split(' ', 2)
1641 return (int(line[0], 16), int(line[1]), line[2],)
1643 sort_file(
1644 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1645 artifact_manager.get_temp_file(
1646 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1648 key=sort_key,
1649 tempdirs=[Ctx().tmpdir],
1651 logger.quiet("Done")
1654 class IndexSymbolsPass(Pass):
1655 """This pass was formerly known as pass7."""
1657 def register_artifacts(self):
1658 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1659 self._register_temp_file_needed(config.PROJECTS)
1660 self._register_temp_file_needed(config.SYMBOL_DB)
1661 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1663 def generate_offsets_for_symbolings(self):
1664 """This function iterates through all the lines in
1665 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1666 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1667 where SYMBOLIC_NAME is first encountered. This will allow us to
1668 seek to the various offsets in the file and sequentially read only
1669 the openings and closings that we need."""
1671 offsets = {}
1673 f = open(
1674 artifact_manager.get_temp_file(
1675 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1676 'r')
1677 old_id = None
1678 while True:
1679 fpos = f.tell()
1680 line = f.readline()
1681 if not line:
1682 break
1683 id, svn_revnum, ignored = line.split(" ", 2)
1684 id = int(id, 16)
1685 if id != old_id:
1686 logger.verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1687 old_id = id
1688 offsets[id] = fpos
1690 f.close()
1692 offsets_db = file(
1693 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1694 cPickle.dump(offsets, offsets_db, -1)
1695 offsets_db.close()
1697 def run(self, run_options, stats_keeper):
1698 logger.quiet("Determining offsets for all symbolic names...")
1699 Ctx()._projects = read_projects(
1700 artifact_manager.get_temp_file(config.PROJECTS)
1702 Ctx()._symbol_db = SymbolDatabase()
1703 self.generate_offsets_for_symbolings()
1704 Ctx()._symbol_db.close()
1705 logger.quiet("Done.")
1708 class OutputPass(Pass):
1709 """This pass was formerly known as pass8."""
1711 def register_artifacts(self):
1712 self._register_temp_file_needed(config.PROJECTS)
1713 self._register_temp_file_needed(config.CVS_PATHS_DB)
1714 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1715 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1716 self._register_temp_file_needed(config.SYMBOL_DB)
1717 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1718 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1719 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1720 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1721 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1722 Ctx().output_option.register_artifacts(self)
1724 def run(self, run_options, stats_keeper):
1725 Ctx()._projects = read_projects(
1726 artifact_manager.get_temp_file(config.PROJECTS)
1728 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1729 Ctx()._metadata_db = MetadataDatabase(
1730 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1731 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1732 DB_OPEN_READ,
1734 Ctx()._cvs_items_db = IndexedCVSItemStore(
1735 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1736 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1737 DB_OPEN_READ)
1738 Ctx()._symbol_db = SymbolDatabase()
1739 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1741 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1743 svn_revnum = 1
1744 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1745 while svn_commit:
1746 svn_commit.output(Ctx().output_option)
1747 svn_revnum += 1
1748 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1750 Ctx().output_option.cleanup()
1751 Ctx()._persistence_manager.close()
1753 Ctx()._symbol_db.close()
1754 Ctx()._cvs_items_db.close()
1755 Ctx()._metadata_db.close()
1756 Ctx()._cvs_path_db.close()
1759 # The list of passes constituting a run of cvs2svn:
1760 passes = [
1761 CollectRevsPass(),
1762 CleanMetadataPass(),
1763 CollateSymbolsPass(),
1764 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1765 FilterSymbolsPass(),
1766 SortRevisionsPass(),
1767 SortSymbolsPass(),
1768 InitializeChangesetsPass(),
1769 #CheckIndexedItemStoreDependenciesPass(
1770 # config.CVS_ITEMS_SORTED_STORE,
1771 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1772 BreakRevisionChangesetCyclesPass(),
1773 RevisionTopologicalSortPass(),
1774 BreakSymbolChangesetCyclesPass(),
1775 BreakAllChangesetCyclesPass(),
1776 TopologicalSortPass(),
1777 CreateRevsPass(),
1778 SortSymbolOpeningsClosingsPass(),
1779 IndexSymbolsPass(),
1780 OutputPass(),