Add a way to specify the MimeMapper mappings to its constructor directly.
[cvs2svn.git] / cvs2svn_lib / passes.py
blob8381d5ed620f0699712cb2187de894d1f406e01f
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import Log
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.collect_data import CollectData
78 from cvs2svn_lib.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self):
88 self._register_temp_file(config.PROJECTS)
89 self._register_temp_file(config.SYMBOL_STATISTICS)
90 self._register_temp_file(config.METADATA_INDEX_TABLE)
91 self._register_temp_file(config.METADATA_STORE)
92 self._register_temp_file(config.CVS_PATHS_DB)
93 self._register_temp_file(config.CVS_ITEMS_STORE)
95 def run(self, run_options, stats_keeper):
96 Log().quiet("Examining all CVS ',v' files...")
97 Ctx()._projects = {}
98 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
99 cd = CollectData(stats_keeper)
100 for project in run_options.projects:
101 cd.process_project(project)
102 run_options.projects = None
104 fatal_errors = cd.close()
106 if fatal_errors:
107 raise FatalException("Pass 1 complete.\n"
108 + "=" * 75 + "\n"
109 + "Error summary:\n"
110 + "\n".join(fatal_errors) + "\n"
111 + "Exited due to fatal error(s).")
113 Ctx()._cvs_path_db.close()
114 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
115 Log().quiet("Done")
118 class CleanMetadataPass(Pass):
119 """Clean up CVS revision metadata and write it to a new database."""
121 def register_artifacts(self):
122 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
123 self._register_temp_file(config.METADATA_CLEAN_STORE)
124 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
125 self._register_temp_file_needed(config.METADATA_STORE)
127 def _get_clean_author(self, author):
128 """Return AUTHOR, converted appropriately to UTF8.
130 Raise a UnicodeException if it cannot be converted using the
131 configured cvs_author_decoder."""
133 try:
134 return self._authors[author]
135 except KeyError:
136 pass
138 try:
139 clean_author = Ctx().cvs_author_decoder(author)
140 except UnicodeError:
141 self._authors[author] = author
142 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
144 try:
145 clean_author = clean_author.encode('utf8')
146 except UnicodeError:
147 self._authors[author] = author
148 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
150 self._authors[author] = clean_author
151 return clean_author
153 def _get_clean_log_msg(self, log_msg):
154 """Return LOG_MSG, converted appropriately to UTF8.
156 Raise a UnicodeException if it cannot be converted using the
157 configured cvs_log_decoder."""
159 try:
160 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
161 except UnicodeError:
162 raise UnicodeError(
163 'Problem decoding log message:\n'
164 '%s\n'
165 '%s\n'
166 '%s'
167 % ('-' * 75, log_msg, '-' * 75,)
170 try:
171 return clean_log_msg.encode('utf8')
172 except UnicodeError:
173 raise UnicodeError(
174 'Problem encoding log message:\n'
175 '%s\n'
176 '%s\n'
177 '%s'
178 % ('-' * 75, log_msg, '-' * 75,)
181 def _clean_metadata(self, metadata):
182 """Clean up METADATA by overwriting its members as necessary."""
184 try:
185 metadata.author = self._get_clean_author(metadata.author)
186 except UnicodeError, e:
187 Log().warn('%s: %s' % (warning_prefix, e,))
188 self.warnings = True
190 try:
191 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
192 except UnicodeError, e:
193 Log().warn('%s: %s' % (warning_prefix, e,))
194 self.warnings = True
196 def run(self, run_options, stats_keeper):
197 Log().quiet("Converting metadata to UTF8...")
198 metadata_db = MetadataDatabase(
199 artifact_manager.get_temp_file(config.METADATA_STORE),
200 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
201 DB_OPEN_READ,
203 metadata_clean_db = MetadataDatabase(
204 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
205 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
206 DB_OPEN_NEW,
209 self.warnings = False
211 # A map {author : clean_author} for those known (to avoid
212 # repeating warnings):
213 self._authors = {}
215 for id in metadata_db.iterkeys():
216 metadata = metadata_db[id]
218 # Record the original author name because it might be needed for
219 # expanding CVS keywords:
220 metadata.original_author = metadata.author
222 self._clean_metadata(metadata)
224 metadata_clean_db[id] = metadata
226 if self.warnings:
227 raise FatalError(
228 'There were warnings converting author names and/or log messages\n'
229 'to Unicode (see messages above). Please restart this pass\n'
230 'with one or more \'--encoding\' parameters or with\n'
231 '\'--fallback-encoding\'.'
234 metadata_clean_db.close()
235 metadata_db.close()
236 Log().quiet("Done")
239 class CollateSymbolsPass(Pass):
240 """Divide symbols into branches, tags, and excludes."""
242 conversion_names = {
243 Trunk : 'trunk',
244 Branch : 'branch',
245 Tag : 'tag',
246 ExcludedSymbol : 'exclude',
247 Symbol : '.',
250 def register_artifacts(self):
251 self._register_temp_file(config.SYMBOL_DB)
252 self._register_temp_file_needed(config.PROJECTS)
253 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
255 def get_symbol(self, run_options, stats):
256 """Use StrategyRules to decide what to do with a symbol.
258 STATS is an instance of symbol_statistics._Stats describing an
259 instance of Symbol or Trunk. To determine how the symbol is to be
260 converted, consult the StrategyRules in the project's
261 symbol_strategy_rules. Each rule is allowed a chance to change
262 the way the symbol will be converted. If the symbol is not a
263 Trunk or TypedSymbol after all rules have run, raise
264 IndeterminateSymbolException."""
266 symbol = stats.lod
267 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
268 for rule in rules:
269 symbol = rule.get_symbol(symbol, stats)
270 assert symbol is not None
272 stats.check_valid(symbol)
274 return symbol
276 def log_symbol_summary(self, stats, symbol):
277 if not self.symbol_info_file:
278 return
280 if isinstance(symbol, Trunk):
281 name = '.trunk.'
282 preferred_parent_name = '.'
283 else:
284 name = stats.lod.name
285 if symbol.preferred_parent_id is None:
286 preferred_parent_name = '.'
287 else:
288 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
289 if isinstance(preferred_parent, Trunk):
290 preferred_parent_name = '.trunk.'
291 else:
292 preferred_parent_name = preferred_parent.name
294 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
295 symbol_path = symbol.base_path
296 else:
297 symbol_path = '.'
299 self.symbol_info_file.write(
300 '%-5d %-30s %-10s %s %s\n' % (
301 stats.lod.project.id,
302 name,
303 self.conversion_names[symbol.__class__],
304 symbol_path,
305 preferred_parent_name,
308 self.symbol_info_file.write(' # %s\n' % (stats,))
309 parent_counts = stats.possible_parents.items()
310 if parent_counts:
311 self.symbol_info_file.write(' # Possible parents:\n')
312 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
313 for (pp, count) in parent_counts:
314 if isinstance(pp, Trunk):
315 self.symbol_info_file.write(
316 ' # .trunk. : %d\n' % (count,)
318 else:
319 self.symbol_info_file.write(
320 ' # %s : %d\n' % (pp.name, count,)
323 def get_symbols(self, run_options):
324 """Return a map telling how to convert symbols.
326 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
327 indicating how each symbol should be converted. Trunk objects in
328 SYMBOL_STATS are passed through unchanged. One object is included
329 in the return value for each line of development described in
330 SYMBOL_STATS.
332 Raise FatalError if there was an error."""
334 errors = []
335 mismatches = []
337 if Ctx().symbol_info_filename is not None:
338 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
339 self.symbol_info_file.write(
340 '# Columns: project_id symbol_name conversion symbol_path '
341 'preferred_parent_name\n'
343 else:
344 self.symbol_info_file = None
346 # Initialize each symbol strategy rule a single time, even if it
347 # is used in more than one project. First define a map from
348 # object id to symbol strategy rule:
349 rules = {}
350 for rule_list in run_options.project_symbol_strategy_rules:
351 for rule in rule_list:
352 rules[id(rule)] = rule
354 for rule in rules.itervalues():
355 rule.start(self.symbol_stats)
357 retval = {}
359 for stats in self.symbol_stats:
360 try:
361 symbol = self.get_symbol(run_options, stats)
362 except IndeterminateSymbolException, e:
363 self.log_symbol_summary(stats, stats.lod)
364 mismatches.append(e.stats)
365 except SymbolPlanError, e:
366 self.log_symbol_summary(stats, stats.lod)
367 errors.append(e)
368 else:
369 self.log_symbol_summary(stats, symbol)
370 retval[stats.lod] = symbol
372 for rule in rules.itervalues():
373 rule.finish()
375 if self.symbol_info_file:
376 self.symbol_info_file.close()
378 del self.symbol_info_file
380 if errors or mismatches:
381 s = ['Problems determining how symbols should be converted:\n']
382 for e in errors:
383 s.append('%s\n' % (e,))
384 if mismatches:
385 s.append(
386 'It is not clear how the following symbols '
387 'should be converted.\n'
388 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
389 'and/or\n'
390 '--symbol-default to resolve the ambiguity.\n'
392 for stats in mismatches:
393 s.append(' %s\n' % (stats,))
394 raise FatalError(''.join(s))
395 else:
396 return retval
398 def run(self, run_options, stats_keeper):
399 Ctx()._projects = read_projects(
400 artifact_manager.get_temp_file(config.PROJECTS)
402 self.symbol_stats = SymbolStatistics(
403 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
406 symbol_map = self.get_symbols(run_options)
408 # Check the symbols for consistency and bail out if there were errors:
409 self.symbol_stats.check_consistency(symbol_map)
411 # Check that the symbols all have SVN paths set and that the paths
412 # are disjoint:
413 Ctx().output_option.check_symbols(symbol_map)
415 for symbol in symbol_map.itervalues():
416 if isinstance(symbol, ExcludedSymbol):
417 self.symbol_stats.exclude_symbol(symbol)
419 create_symbol_database(symbol_map.values())
421 del self.symbol_stats
423 Log().quiet("Done")
426 class FilterSymbolsPass(Pass):
427 """Delete any branches/tags that are to be excluded.
429 Also delete revisions on excluded branches, and delete other
430 references to the excluded symbols."""
432 def register_artifacts(self):
433 self._register_temp_file(config.ITEM_SERIALIZER)
434 self._register_temp_file(config.CVS_REVS_DATAFILE)
435 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
436 self._register_temp_file_needed(config.PROJECTS)
437 self._register_temp_file_needed(config.SYMBOL_DB)
438 self._register_temp_file_needed(config.CVS_PATHS_DB)
439 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
440 Ctx().revision_collector.register_artifacts(self)
442 def run(self, run_options, stats_keeper):
443 Ctx()._projects = read_projects(
444 artifact_manager.get_temp_file(config.PROJECTS)
446 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
447 Ctx()._symbol_db = SymbolDatabase()
448 cvs_item_store = OldCVSItemStore(
449 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
451 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
452 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
453 cPickle.dump(cvs_item_serializer, f, -1)
454 f.close()
456 rev_db = NewSortableCVSRevisionDatabase(
457 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
458 cvs_item_serializer,
461 symbol_db = NewSortableCVSSymbolDatabase(
462 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
463 cvs_item_serializer,
466 revision_collector = Ctx().revision_collector
468 Log().quiet("Filtering out excluded symbols and summarizing items...")
470 stats_keeper.reset_cvs_rev_info()
471 revision_collector.start()
473 # Process the cvs items store one file at a time:
474 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
475 Log().verbose(cvs_file_items.cvs_file.filename)
476 cvs_file_items.filter_excluded_symbols()
477 cvs_file_items.mutate_symbols()
478 cvs_file_items.adjust_parents()
479 cvs_file_items.refine_symbols()
480 cvs_file_items.record_opened_symbols()
481 cvs_file_items.record_closed_symbols()
482 cvs_file_items.check_link_consistency()
484 # Give the revision collector a chance to collect data about the
485 # file:
486 revision_collector.process_file(cvs_file_items)
488 # Store whatever is left to the new file and update statistics:
489 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
490 for cvs_item in cvs_file_items.values():
491 stats_keeper.record_cvs_item(cvs_item)
493 if isinstance(cvs_item, CVSRevision):
494 rev_db.add(cvs_item)
495 elif isinstance(cvs_item, CVSSymbol):
496 symbol_db.add(cvs_item)
498 stats_keeper.set_stats_reflect_exclude(True)
500 rev_db.close()
501 symbol_db.close()
502 revision_collector.finish()
503 cvs_item_store.close()
504 Ctx()._symbol_db.close()
505 Ctx()._cvs_path_db.close()
507 Log().quiet("Done")
510 class SortRevisionsPass(Pass):
511 """Sort the revisions file."""
513 def register_artifacts(self):
514 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
515 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
517 def run(self, run_options, stats_keeper):
518 Log().quiet("Sorting CVS revision summaries...")
519 sort_file(
520 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
521 artifact_manager.get_temp_file(
522 config.CVS_REVS_SORTED_DATAFILE
524 tempdirs=[Ctx().tmpdir],
526 Log().quiet("Done")
529 class SortSymbolsPass(Pass):
530 """Sort the symbols file."""
532 def register_artifacts(self):
533 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
534 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
536 def run(self, run_options, stats_keeper):
537 Log().quiet("Sorting CVS symbol summaries...")
538 sort_file(
539 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
540 artifact_manager.get_temp_file(
541 config.CVS_SYMBOLS_SORTED_DATAFILE
543 tempdirs=[Ctx().tmpdir],
545 Log().quiet("Done")
548 class InitializeChangesetsPass(Pass):
549 """Create preliminary CommitSets."""
551 def register_artifacts(self):
552 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
553 self._register_temp_file(config.CHANGESETS_STORE)
554 self._register_temp_file(config.CHANGESETS_INDEX)
555 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
556 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
557 self._register_temp_file_needed(config.PROJECTS)
558 self._register_temp_file_needed(config.SYMBOL_DB)
559 self._register_temp_file_needed(config.CVS_PATHS_DB)
560 self._register_temp_file_needed(config.ITEM_SERIALIZER)
561 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
562 self._register_temp_file_needed(
563 config.CVS_SYMBOLS_SORTED_DATAFILE)
565 def get_revision_changesets(self):
566 """Generate revision changesets, one at a time.
568 Each time, yield a list of CVSRevisions that might potentially
569 consititute a changeset."""
571 # Create changesets for CVSRevisions:
572 old_metadata_id = None
573 old_timestamp = None
574 changeset_items = []
576 db = OldSortableCVSRevisionDatabase(
577 artifact_manager.get_temp_file(
578 config.CVS_REVS_SORTED_DATAFILE
580 self.cvs_item_serializer,
583 for cvs_rev in db:
584 if cvs_rev.metadata_id != old_metadata_id \
585 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
586 # Start a new changeset. First finish up the old changeset,
587 # if any:
588 if changeset_items:
589 yield changeset_items
590 changeset_items = []
591 old_metadata_id = cvs_rev.metadata_id
592 changeset_items.append(cvs_rev)
593 old_timestamp = cvs_rev.timestamp
595 # Finish up the last changeset, if any:
596 if changeset_items:
597 yield changeset_items
599 def get_symbol_changesets(self):
600 """Generate symbol changesets, one at a time.
602 Each time, yield a list of CVSSymbols that might potentially
603 consititute a changeset."""
605 old_symbol_id = None
606 changeset_items = []
608 db = OldSortableCVSSymbolDatabase(
609 artifact_manager.get_temp_file(
610 config.CVS_SYMBOLS_SORTED_DATAFILE
612 self.cvs_item_serializer,
615 for cvs_symbol in db:
616 if cvs_symbol.symbol.id != old_symbol_id:
617 # Start a new changeset. First finish up the old changeset,
618 # if any:
619 if changeset_items:
620 yield changeset_items
621 changeset_items = []
622 old_symbol_id = cvs_symbol.symbol.id
623 changeset_items.append(cvs_symbol)
625 # Finish up the last changeset, if any:
626 if changeset_items:
627 yield changeset_items
629 @staticmethod
630 def compare_items(a, b):
631 return (
632 cmp(a.timestamp, b.timestamp)
633 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
634 or cmp([int(x) for x in a.rev.split('.')],
635 [int(x) for x in b.rev.split('.')])
636 or cmp(a.id, b.id))
638 def break_internal_dependencies(self, changeset_items):
639 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
641 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
642 belong in a single RevisionChangeset, but there might be internal
643 dependencies among the items. Return a list of lists, where each
644 sublist is a list of CVSRevisions and at least one internal
645 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
646 to be split, then the return value will contain a single value,
647 namely the original value of CHANGESET_ITEMS. Split
648 CHANGESET_ITEMS at most once, even though the resulting changesets
649 might themselves have internal dependencies."""
651 # We only look for succ dependencies, since by doing so we
652 # automatically cover pred dependencies as well. First create a
653 # list of tuples (pred, succ) of id pairs for CVSItems that depend
654 # on each other.
655 dependencies = []
656 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
657 for cvs_item in changeset_items:
658 for next_id in cvs_item.get_succ_ids():
659 if next_id in changeset_cvs_item_ids:
660 # Sanity check: a CVSItem should never depend on itself:
661 if next_id == cvs_item.id:
662 raise InternalError('Item depends on itself: %s' % (cvs_item,))
664 dependencies.append((cvs_item.id, next_id,))
666 if dependencies:
667 # Sort the changeset_items in a defined order (chronological to the
668 # extent that the timestamps are correct and unique).
669 changeset_items.sort(self.compare_items)
670 indexes = {}
671 for (i, changeset_item) in enumerate(changeset_items):
672 indexes[changeset_item.id] = i
673 # How many internal dependencies would be broken by breaking the
674 # Changeset after a particular index?
675 breaks = [0] * len(changeset_items)
676 for (pred, succ,) in dependencies:
677 pred_index = indexes[pred]
678 succ_index = indexes[succ]
679 breaks[min(pred_index, succ_index)] += 1
680 breaks[max(pred_index, succ_index)] -= 1
681 best_i = None
682 best_count = -1
683 best_time = 0
684 for i in range(1, len(breaks)):
685 breaks[i] += breaks[i - 1]
686 for i in range(0, len(breaks) - 1):
687 if breaks[i] > best_count:
688 best_i = i
689 best_count = breaks[i]
690 best_time = (changeset_items[i + 1].timestamp
691 - changeset_items[i].timestamp)
692 elif breaks[i] == best_count \
693 and (changeset_items[i + 1].timestamp
694 - changeset_items[i].timestamp) < best_time:
695 best_i = i
696 best_count = breaks[i]
697 best_time = (changeset_items[i + 1].timestamp
698 - changeset_items[i].timestamp)
699 # Reuse the old changeset.id for the first of the split changesets.
700 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
701 else:
702 return [changeset_items]
704 def break_all_internal_dependencies(self, changeset_items):
705 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
707 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
708 be part of a single changeset. Break this list into sublists,
709 where the CVSRevisions in each sublist are free of mutual
710 dependencies."""
712 # This method is written non-recursively to avoid any possible
713 # problems with recursion depth.
715 changesets_to_split = [changeset_items]
716 while changesets_to_split:
717 changesets = self.break_internal_dependencies(changesets_to_split.pop())
718 if len(changesets) == 1:
719 [changeset_items] = changesets
720 yield changeset_items
721 else:
722 # The changeset had to be split; see if either of the
723 # fragments have to be split:
724 changesets.reverse()
725 changesets_to_split.extend(changesets)
727 def get_changesets(self):
728 """Generate (Changeset, [CVSItem,...]) for all changesets.
730 The Changesets already have their internal dependencies broken.
731 The [CVSItem,...] list is the list of CVSItems in the
732 corresponding Changeset."""
734 for changeset_items in self.get_revision_changesets():
735 for split_changeset_items \
736 in self.break_all_internal_dependencies(changeset_items):
737 yield (
738 RevisionChangeset(
739 self.changeset_key_generator.gen_id(),
740 [cvs_rev.id for cvs_rev in split_changeset_items]
742 split_changeset_items,
745 for changeset_items in self.get_symbol_changesets():
746 yield (
747 create_symbol_changeset(
748 self.changeset_key_generator.gen_id(),
749 changeset_items[0].symbol,
750 [cvs_symbol.id for cvs_symbol in changeset_items]
752 changeset_items,
755 def run(self, run_options, stats_keeper):
756 Log().quiet("Creating preliminary commit sets...")
758 Ctx()._projects = read_projects(
759 artifact_manager.get_temp_file(config.PROJECTS)
761 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
762 Ctx()._symbol_db = SymbolDatabase()
764 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
765 self.cvs_item_serializer = cPickle.load(f)
766 f.close()
768 changeset_db = ChangesetDatabase(
769 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
770 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
771 DB_OPEN_NEW,
773 cvs_item_to_changeset_id = CVSItemToChangesetTable(
774 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
775 DB_OPEN_NEW,
778 self.sorted_cvs_items_db = IndexedCVSItemStore(
779 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
780 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
781 DB_OPEN_NEW)
783 self.changeset_key_generator = KeyGenerator()
785 for (changeset, changeset_items) in self.get_changesets():
786 if Log().is_on(Log.DEBUG):
787 Log().debug(repr(changeset))
788 changeset_db.store(changeset)
789 for cvs_item in changeset_items:
790 self.sorted_cvs_items_db.add(cvs_item)
791 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
793 self.sorted_cvs_items_db.close()
794 cvs_item_to_changeset_id.close()
795 changeset_db.close()
796 Ctx()._symbol_db.close()
797 Ctx()._cvs_path_db.close()
799 del self.cvs_item_serializer
801 Log().quiet("Done")
804 class ProcessedChangesetLogger:
805 def __init__(self):
806 self.processed_changeset_ids = []
808 def log(self, changeset_id):
809 if Log().is_on(Log.DEBUG):
810 self.processed_changeset_ids.append(changeset_id)
812 def flush(self):
813 if self.processed_changeset_ids:
814 Log().debug(
815 'Consumed changeset ids %s'
816 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
818 del self.processed_changeset_ids[:]
821 class BreakRevisionChangesetCyclesPass(Pass):
822 """Break up any dependency cycles involving only RevisionChangesets."""
824 def register_artifacts(self):
825 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
826 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
827 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
828 self._register_temp_file_needed(config.PROJECTS)
829 self._register_temp_file_needed(config.SYMBOL_DB)
830 self._register_temp_file_needed(config.CVS_PATHS_DB)
831 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
832 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
833 self._register_temp_file_needed(config.CHANGESETS_STORE)
834 self._register_temp_file_needed(config.CHANGESETS_INDEX)
835 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
837 def get_source_changesets(self):
838 old_changeset_db = ChangesetDatabase(
839 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
840 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
841 DB_OPEN_READ)
843 changeset_ids = old_changeset_db.keys()
845 for changeset_id in changeset_ids:
846 yield old_changeset_db[changeset_id]
848 old_changeset_db.close()
849 del old_changeset_db
851 def break_cycle(self, cycle):
852 """Break up one or more changesets in CYCLE to help break the cycle.
854 CYCLE is a list of Changesets where
856 cycle[i] depends on cycle[i - 1]
858 Break up one or more changesets in CYCLE to make progress towards
859 breaking the cycle. Update self.changeset_graph accordingly.
861 It is not guaranteed that the cycle will be broken by one call to
862 this routine, but at least some progress must be made."""
864 self.processed_changeset_logger.flush()
865 best_i = None
866 best_link = None
867 for i in range(len(cycle)):
868 # It's OK if this index wraps to -1:
869 link = ChangesetGraphLink(
870 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
872 if best_i is None or link < best_link:
873 best_i = i
874 best_link = link
876 if Log().is_on(Log.DEBUG):
877 Log().debug(
878 'Breaking cycle %s by breaking node %x' % (
879 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
880 best_link.changeset.id,))
882 new_changesets = best_link.break_changeset(self.changeset_key_generator)
884 self.changeset_graph.delete_changeset(best_link.changeset)
886 for changeset in new_changesets:
887 self.changeset_graph.add_new_changeset(changeset)
889 def run(self, run_options, stats_keeper):
890 Log().quiet("Breaking revision changeset dependency cycles...")
892 Ctx()._projects = read_projects(
893 artifact_manager.get_temp_file(config.PROJECTS)
895 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
896 Ctx()._symbol_db = SymbolDatabase()
897 Ctx()._cvs_items_db = IndexedCVSItemStore(
898 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
899 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
900 DB_OPEN_READ)
902 shutil.copyfile(
903 artifact_manager.get_temp_file(
904 config.CVS_ITEM_TO_CHANGESET),
905 artifact_manager.get_temp_file(
906 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
907 cvs_item_to_changeset_id = CVSItemToChangesetTable(
908 artifact_manager.get_temp_file(
909 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
910 DB_OPEN_WRITE)
912 changeset_db = ChangesetDatabase(
913 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
914 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
915 DB_OPEN_NEW)
917 self.changeset_graph = ChangesetGraph(
918 changeset_db, cvs_item_to_changeset_id
921 max_changeset_id = 0
922 for changeset in self.get_source_changesets():
923 changeset_db.store(changeset)
924 if isinstance(changeset, RevisionChangeset):
925 self.changeset_graph.add_changeset(changeset)
926 max_changeset_id = max(max_changeset_id, changeset.id)
928 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
930 self.processed_changeset_logger = ProcessedChangesetLogger()
932 # Consume the graph, breaking cycles using self.break_cycle():
933 for (changeset, time_range) in self.changeset_graph.consume_graph(
934 cycle_breaker=self.break_cycle
936 self.processed_changeset_logger.log(changeset.id)
938 self.processed_changeset_logger.flush()
939 del self.processed_changeset_logger
941 self.changeset_graph.close()
942 self.changeset_graph = None
943 Ctx()._cvs_items_db.close()
944 Ctx()._symbol_db.close()
945 Ctx()._cvs_path_db.close()
947 Log().quiet("Done")
950 class RevisionTopologicalSortPass(Pass):
951 """Sort RevisionChangesets into commit order.
953 Also convert them to OrderedChangesets, without changing their ids."""
955 def register_artifacts(self):
956 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
957 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
958 self._register_temp_file_needed(config.PROJECTS)
959 self._register_temp_file_needed(config.SYMBOL_DB)
960 self._register_temp_file_needed(config.CVS_PATHS_DB)
961 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
962 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
963 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
964 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
965 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
967 def get_source_changesets(self, changeset_db):
968 changeset_ids = changeset_db.keys()
970 for changeset_id in changeset_ids:
971 yield changeset_db[changeset_id]
973 def get_changesets(self):
974 changeset_db = ChangesetDatabase(
975 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
976 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
977 DB_OPEN_READ,
980 changeset_graph = ChangesetGraph(
981 changeset_db,
982 CVSItemToChangesetTable(
983 artifact_manager.get_temp_file(
984 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
986 DB_OPEN_READ,
990 for changeset in self.get_source_changesets(changeset_db):
991 if isinstance(changeset, RevisionChangeset):
992 changeset_graph.add_changeset(changeset)
993 else:
994 yield changeset
996 changeset_ids = []
998 # Sentry:
999 changeset_ids.append(None)
1001 for (changeset, time_range) in changeset_graph.consume_graph():
1002 changeset_ids.append(changeset.id)
1004 # Sentry:
1005 changeset_ids.append(None)
1007 for i in range(1, len(changeset_ids) - 1):
1008 changeset = changeset_db[changeset_ids[i]]
1009 yield OrderedChangeset(
1010 changeset.id, changeset.cvs_item_ids, i - 1,
1011 changeset_ids[i - 1], changeset_ids[i + 1])
1013 changeset_graph.close()
1015 def run(self, run_options, stats_keeper):
1016 Log().quiet("Generating CVSRevisions in commit order...")
1018 Ctx()._projects = read_projects(
1019 artifact_manager.get_temp_file(config.PROJECTS)
1021 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1022 Ctx()._symbol_db = SymbolDatabase()
1023 Ctx()._cvs_items_db = IndexedCVSItemStore(
1024 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1025 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1026 DB_OPEN_READ)
1028 changesets_revordered_db = ChangesetDatabase(
1029 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1030 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1031 DB_OPEN_NEW)
1033 for changeset in self.get_changesets():
1034 changesets_revordered_db.store(changeset)
1036 changesets_revordered_db.close()
1037 Ctx()._cvs_items_db.close()
1038 Ctx()._symbol_db.close()
1039 Ctx()._cvs_path_db.close()
1041 Log().quiet("Done")
1044 class BreakSymbolChangesetCyclesPass(Pass):
1045 """Break up any dependency cycles involving only SymbolChangesets."""
1047 def register_artifacts(self):
1048 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1049 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1050 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1051 self._register_temp_file_needed(config.PROJECTS)
1052 self._register_temp_file_needed(config.SYMBOL_DB)
1053 self._register_temp_file_needed(config.CVS_PATHS_DB)
1054 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1055 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1056 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1057 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1058 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1060 def get_source_changesets(self):
1061 old_changeset_db = ChangesetDatabase(
1062 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1063 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1064 DB_OPEN_READ)
1066 changeset_ids = old_changeset_db.keys()
1068 for changeset_id in changeset_ids:
1069 yield old_changeset_db[changeset_id]
1071 old_changeset_db.close()
1073 def break_cycle(self, cycle):
1074 """Break up one or more changesets in CYCLE to help break the cycle.
1076 CYCLE is a list of Changesets where
1078 cycle[i] depends on cycle[i - 1]
1080 Break up one or more changesets in CYCLE to make progress towards
1081 breaking the cycle. Update self.changeset_graph accordingly.
1083 It is not guaranteed that the cycle will be broken by one call to
1084 this routine, but at least some progress must be made."""
1086 self.processed_changeset_logger.flush()
1087 best_i = None
1088 best_link = None
1089 for i in range(len(cycle)):
1090 # It's OK if this index wraps to -1:
1091 link = ChangesetGraphLink(
1092 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1094 if best_i is None or link < best_link:
1095 best_i = i
1096 best_link = link
1098 if Log().is_on(Log.DEBUG):
1099 Log().debug(
1100 'Breaking cycle %s by breaking node %x' % (
1101 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1102 best_link.changeset.id,))
1104 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1106 self.changeset_graph.delete_changeset(best_link.changeset)
1108 for changeset in new_changesets:
1109 self.changeset_graph.add_new_changeset(changeset)
1111 def run(self, run_options, stats_keeper):
1112 Log().quiet("Breaking symbol changeset dependency cycles...")
1114 Ctx()._projects = read_projects(
1115 artifact_manager.get_temp_file(config.PROJECTS)
1117 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1118 Ctx()._symbol_db = SymbolDatabase()
1119 Ctx()._cvs_items_db = IndexedCVSItemStore(
1120 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1121 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1122 DB_OPEN_READ)
1124 shutil.copyfile(
1125 artifact_manager.get_temp_file(
1126 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1127 artifact_manager.get_temp_file(
1128 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1129 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1130 artifact_manager.get_temp_file(
1131 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1132 DB_OPEN_WRITE)
1134 changeset_db = ChangesetDatabase(
1135 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1136 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1137 DB_OPEN_NEW)
1139 self.changeset_graph = ChangesetGraph(
1140 changeset_db, cvs_item_to_changeset_id
1143 max_changeset_id = 0
1144 for changeset in self.get_source_changesets():
1145 changeset_db.store(changeset)
1146 if isinstance(changeset, SymbolChangeset):
1147 self.changeset_graph.add_changeset(changeset)
1148 max_changeset_id = max(max_changeset_id, changeset.id)
1150 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1152 self.processed_changeset_logger = ProcessedChangesetLogger()
1154 # Consume the graph, breaking cycles using self.break_cycle():
1155 for (changeset, time_range) in self.changeset_graph.consume_graph(
1156 cycle_breaker=self.break_cycle
1158 self.processed_changeset_logger.log(changeset.id)
1160 self.processed_changeset_logger.flush()
1161 del self.processed_changeset_logger
1163 self.changeset_graph.close()
1164 self.changeset_graph = None
1165 Ctx()._cvs_items_db.close()
1166 Ctx()._symbol_db.close()
1167 Ctx()._cvs_path_db.close()
1169 Log().quiet("Done")
1172 class BreakAllChangesetCyclesPass(Pass):
1173 """Break up any dependency cycles that are closed by SymbolChangesets."""
1175 def register_artifacts(self):
1176 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1177 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1178 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1179 self._register_temp_file_needed(config.PROJECTS)
1180 self._register_temp_file_needed(config.SYMBOL_DB)
1181 self._register_temp_file_needed(config.CVS_PATHS_DB)
1182 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1183 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1184 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1185 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1186 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1188 def get_source_changesets(self):
1189 old_changeset_db = ChangesetDatabase(
1190 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1191 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1192 DB_OPEN_READ)
1194 changeset_ids = old_changeset_db.keys()
1196 for changeset_id in changeset_ids:
1197 yield old_changeset_db[changeset_id]
1199 old_changeset_db.close()
1201 def _split_retrograde_changeset(self, changeset):
1202 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1204 Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
1206 self.changeset_graph.delete_changeset(changeset)
1208 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1209 ordinal_limits = {}
1210 for cvs_branch in changeset.iter_cvs_items():
1211 max_pred_ordinal = 0
1212 min_succ_ordinal = sys.maxint
1214 for pred_id in cvs_branch.get_pred_ids():
1215 pred_ordinal = self.ordinals.get(
1216 self.cvs_item_to_changeset_id[pred_id], 0)
1217 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1219 for succ_id in cvs_branch.get_succ_ids():
1220 succ_ordinal = self.ordinals.get(
1221 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1222 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1224 assert max_pred_ordinal < min_succ_ordinal
1225 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1227 # Find the earliest successor ordinal:
1228 min_min_succ_ordinal = sys.maxint
1229 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1230 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1232 early_item_ids = []
1233 late_item_ids = []
1234 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1235 if max_pred_ordinal >= min_min_succ_ordinal:
1236 late_item_ids.append(id)
1237 else:
1238 early_item_ids.append(id)
1240 assert early_item_ids
1241 assert late_item_ids
1243 early_changeset = changeset.create_split_changeset(
1244 self.changeset_key_generator.gen_id(), early_item_ids)
1245 late_changeset = changeset.create_split_changeset(
1246 self.changeset_key_generator.gen_id(), late_item_ids)
1248 self.changeset_graph.add_new_changeset(early_changeset)
1249 self.changeset_graph.add_new_changeset(late_changeset)
1251 early_split = self._split_if_retrograde(early_changeset.id)
1253 # Because of the way we constructed it, the early changeset should
1254 # not have to be split:
1255 assert not early_split
1257 self._split_if_retrograde(late_changeset.id)
1259 def _split_if_retrograde(self, changeset_id):
1260 node = self.changeset_graph[changeset_id]
1261 pred_ordinals = [
1262 self.ordinals[id]
1263 for id in node.pred_ids
1264 if id in self.ordinals
1266 pred_ordinals.sort()
1267 succ_ordinals = [
1268 self.ordinals[id]
1269 for id in node.succ_ids
1270 if id in self.ordinals
1272 succ_ordinals.sort()
1273 if pred_ordinals and succ_ordinals \
1274 and pred_ordinals[-1] >= succ_ordinals[0]:
1275 self._split_retrograde_changeset(self.changeset_db[node.id])
1276 return True
1277 else:
1278 return False
1280 def break_segment(self, segment):
1281 """Break a changeset in SEGMENT[1:-1].
1283 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1284 that range are SymbolChangesets."""
1286 best_i = None
1287 best_link = None
1288 for i in range(1, len(segment) - 1):
1289 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1291 if best_i is None or link < best_link:
1292 best_i = i
1293 best_link = link
1295 if Log().is_on(Log.DEBUG):
1296 Log().debug(
1297 'Breaking segment %s by breaking node %x' % (
1298 ' -> '.join(['%x' % node.id for node in segment]),
1299 best_link.changeset.id,))
1301 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1303 self.changeset_graph.delete_changeset(best_link.changeset)
1305 for changeset in new_changesets:
1306 self.changeset_graph.add_new_changeset(changeset)
1308 def break_cycle(self, cycle):
1309 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1311 CYCLE is a list of SymbolChangesets where
1313 cycle[i] depends on cycle[i - 1]
1315 . Break up one or more changesets in CYCLE to make progress
1316 towards breaking the cycle. Update self.changeset_graph
1317 accordingly.
1319 It is not guaranteed that the cycle will be broken by one call to
1320 this routine, but at least some progress must be made."""
1322 if Log().is_on(Log.DEBUG):
1323 Log().debug(
1324 'Breaking cycle %s' % (
1325 ' -> '.join(['%x' % changeset.id
1326 for changeset in cycle + [cycle[0]]]),))
1328 # Unwrap the cycle into a segment then break the segment:
1329 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1331 def run(self, run_options, stats_keeper):
1332 Log().quiet("Breaking CVSSymbol dependency loops...")
1334 Ctx()._projects = read_projects(
1335 artifact_manager.get_temp_file(config.PROJECTS)
1337 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1338 Ctx()._symbol_db = SymbolDatabase()
1339 Ctx()._cvs_items_db = IndexedCVSItemStore(
1340 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1341 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1342 DB_OPEN_READ)
1344 shutil.copyfile(
1345 artifact_manager.get_temp_file(
1346 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1347 artifact_manager.get_temp_file(
1348 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1349 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1350 artifact_manager.get_temp_file(
1351 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1352 DB_OPEN_WRITE)
1354 self.changeset_db = ChangesetDatabase(
1355 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1356 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1357 DB_OPEN_NEW)
1359 self.changeset_graph = ChangesetGraph(
1360 self.changeset_db, self.cvs_item_to_changeset_id
1363 # A map {changeset_id : ordinal} for OrderedChangesets:
1364 self.ordinals = {}
1365 # A map {ordinal : changeset_id}:
1366 ordered_changeset_map = {}
1367 # A list of all BranchChangeset ids:
1368 branch_changeset_ids = []
1369 max_changeset_id = 0
1370 for changeset in self.get_source_changesets():
1371 self.changeset_db.store(changeset)
1372 self.changeset_graph.add_changeset(changeset)
1373 if isinstance(changeset, OrderedChangeset):
1374 ordered_changeset_map[changeset.ordinal] = changeset.id
1375 self.ordinals[changeset.id] = changeset.ordinal
1376 elif isinstance(changeset, BranchChangeset):
1377 branch_changeset_ids.append(changeset.id)
1378 max_changeset_id = max(max_changeset_id, changeset.id)
1380 # An array of ordered_changeset ids, indexed by ordinal:
1381 ordered_changesets = []
1382 for ordinal in range(len(ordered_changeset_map)):
1383 id = ordered_changeset_map[ordinal]
1384 ordered_changesets.append(id)
1386 ordered_changeset_ids = set(ordered_changeset_map.values())
1387 del ordered_changeset_map
1389 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1391 # First we scan through all BranchChangesets looking for
1392 # changesets that are individually "retrograde" and splitting
1393 # those up:
1394 for changeset_id in branch_changeset_ids:
1395 self._split_if_retrograde(changeset_id)
1397 del self.ordinals
1399 next_ordered_changeset = 0
1401 self.processed_changeset_logger = ProcessedChangesetLogger()
1403 while self.changeset_graph:
1404 # Consume any nodes that don't have predecessors:
1405 for (changeset, time_range) \
1406 in self.changeset_graph.consume_nopred_nodes():
1407 self.processed_changeset_logger.log(changeset.id)
1408 if changeset.id in ordered_changeset_ids:
1409 next_ordered_changeset += 1
1410 ordered_changeset_ids.remove(changeset.id)
1412 self.processed_changeset_logger.flush()
1414 if not self.changeset_graph:
1415 break
1417 # Now work on the next ordered changeset that has not yet been
1418 # processed. BreakSymbolChangesetCyclesPass has broken any
1419 # cycles involving only SymbolChangesets, so the presence of a
1420 # cycle implies that there is at least one ordered changeset
1421 # left in the graph:
1422 assert next_ordered_changeset < len(ordered_changesets)
1424 id = ordered_changesets[next_ordered_changeset]
1425 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1426 if path:
1427 if Log().is_on(Log.DEBUG):
1428 Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
1429 self.break_segment(path)
1430 else:
1431 # There were no ordered changesets among the reachable
1432 # predecessors, so do generic cycle-breaking:
1433 if Log().is_on(Log.DEBUG):
1434 Log().debug(
1435 'Breaking generic cycle found from %s'
1436 % (self.changeset_db[id],)
1438 self.break_cycle(self.changeset_graph.find_cycle(id))
1440 del self.processed_changeset_logger
1441 self.changeset_graph.close()
1442 self.changeset_graph = None
1443 self.cvs_item_to_changeset_id = None
1444 self.changeset_db = None
1446 Log().quiet("Done")
1449 class TopologicalSortPass(Pass):
1450 """Sort changesets into commit order."""
1452 def register_artifacts(self):
1453 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1454 self._register_temp_file_needed(config.PROJECTS)
1455 self._register_temp_file_needed(config.SYMBOL_DB)
1456 self._register_temp_file_needed(config.CVS_PATHS_DB)
1457 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1458 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1459 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1460 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1461 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1463 def get_source_changesets(self, changeset_db):
1464 for changeset_id in changeset_db.keys():
1465 yield changeset_db[changeset_id]
1467 def get_changesets(self):
1468 """Generate (changeset, timestamp) pairs in commit order."""
1470 changeset_db = ChangesetDatabase(
1471 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1472 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1473 DB_OPEN_READ)
1475 changeset_graph = ChangesetGraph(
1476 changeset_db,
1477 CVSItemToChangesetTable(
1478 artifact_manager.get_temp_file(
1479 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1481 DB_OPEN_READ,
1484 symbol_changeset_ids = set()
1486 for changeset in self.get_source_changesets(changeset_db):
1487 changeset_graph.add_changeset(changeset)
1488 if isinstance(changeset, SymbolChangeset):
1489 symbol_changeset_ids.add(changeset.id)
1491 # Ensure a monotonically-increasing timestamp series by keeping
1492 # track of the previous timestamp and ensuring that the following
1493 # one is larger.
1494 timestamper = Timestamper()
1496 for (changeset, time_range) in changeset_graph.consume_graph():
1497 timestamp = timestamper.get(
1498 time_range.t_max, changeset.id in symbol_changeset_ids
1500 yield (changeset, timestamp)
1502 changeset_graph.close()
1504 def run(self, run_options, stats_keeper):
1505 Log().quiet("Generating CVSRevisions in commit order...")
1507 Ctx()._projects = read_projects(
1508 artifact_manager.get_temp_file(config.PROJECTS)
1510 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1511 Ctx()._symbol_db = SymbolDatabase()
1512 Ctx()._cvs_items_db = IndexedCVSItemStore(
1513 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1514 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1515 DB_OPEN_READ)
1517 sorted_changesets = open(
1518 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1519 'w')
1521 for (changeset, timestamp) in self.get_changesets():
1522 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1524 sorted_changesets.close()
1526 Ctx()._cvs_items_db.close()
1527 Ctx()._symbol_db.close()
1528 Ctx()._cvs_path_db.close()
1530 Log().quiet("Done")
1533 class CreateRevsPass(Pass):
1534 """Generate the SVNCommit <-> CVSRevision mapping databases.
1536 SVNCommitCreator also calls SymbolingsLogger to register
1537 CVSRevisions that represent an opening or closing for a path on a
1538 branch or tag. See SymbolingsLogger for more details.
1540 This pass was formerly known as pass5."""
1542 def register_artifacts(self):
1543 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1544 self._register_temp_file(config.SVN_COMMITS_STORE)
1545 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1546 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1547 self._register_temp_file_needed(config.PROJECTS)
1548 self._register_temp_file_needed(config.CVS_PATHS_DB)
1549 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1550 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1551 self._register_temp_file_needed(config.SYMBOL_DB)
1552 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1553 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1554 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1556 def get_changesets(self):
1557 """Generate (changeset,timestamp,) tuples in commit order."""
1559 changeset_db = ChangesetDatabase(
1560 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1561 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1562 DB_OPEN_READ)
1564 for line in file(
1565 artifact_manager.get_temp_file(
1566 config.CHANGESETS_SORTED_DATAFILE)):
1567 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1568 yield (changeset_db[changeset_id], timestamp)
1570 changeset_db.close()
1572 def get_svn_commits(self, creator):
1573 """Generate the SVNCommits, in order."""
1575 for (changeset, timestamp) in self.get_changesets():
1576 for svn_commit in creator.process_changeset(changeset, timestamp):
1577 yield svn_commit
1579 def log_svn_commit(self, svn_commit):
1580 """Output information about SVN_COMMIT."""
1582 Log().normal(
1583 'Creating Subversion r%d (%s)'
1584 % (svn_commit.revnum, svn_commit.get_description(),)
1587 if isinstance(svn_commit, SVNRevisionCommit):
1588 for cvs_rev in svn_commit.cvs_revs:
1589 Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1591 def run(self, run_options, stats_keeper):
1592 Log().quiet("Mapping CVS revisions to Subversion commits...")
1594 Ctx()._projects = read_projects(
1595 artifact_manager.get_temp_file(config.PROJECTS)
1597 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1598 Ctx()._symbol_db = SymbolDatabase()
1599 Ctx()._cvs_items_db = IndexedCVSItemStore(
1600 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1601 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1602 DB_OPEN_READ)
1604 Ctx()._symbolings_logger = SymbolingsLogger()
1606 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1608 creator = SVNCommitCreator()
1609 for svn_commit in self.get_svn_commits(creator):
1610 self.log_svn_commit(svn_commit)
1611 persistence_manager.put_svn_commit(svn_commit)
1613 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1614 del creator
1616 persistence_manager.close()
1617 Ctx()._symbolings_logger.close()
1618 Ctx()._cvs_items_db.close()
1619 Ctx()._symbol_db.close()
1620 Ctx()._cvs_path_db.close()
1622 Log().quiet("Done")
1625 class SortSymbolOpeningsClosingsPass(Pass):
1626 """This pass was formerly known as pass6."""
1628 def register_artifacts(self):
1629 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1630 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1632 def run(self, run_options, stats_keeper):
1633 Log().quiet("Sorting symbolic name source revisions...")
1635 def sort_key(line):
1636 line = line.split(' ', 2)
1637 return (int(line[0], 16), int(line[1]), line[2],)
1639 sort_file(
1640 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1641 artifact_manager.get_temp_file(
1642 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1644 key=sort_key,
1645 tempdirs=[Ctx().tmpdir],
1647 Log().quiet("Done")
1650 class IndexSymbolsPass(Pass):
1651 """This pass was formerly known as pass7."""
1653 def register_artifacts(self):
1654 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1655 self._register_temp_file_needed(config.PROJECTS)
1656 self._register_temp_file_needed(config.SYMBOL_DB)
1657 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1659 def generate_offsets_for_symbolings(self):
1660 """This function iterates through all the lines in
1661 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1662 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1663 where SYMBOLIC_NAME is first encountered. This will allow us to
1664 seek to the various offsets in the file and sequentially read only
1665 the openings and closings that we need."""
1667 offsets = {}
1669 f = open(
1670 artifact_manager.get_temp_file(
1671 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1672 'r')
1673 old_id = None
1674 while True:
1675 fpos = f.tell()
1676 line = f.readline()
1677 if not line:
1678 break
1679 id, svn_revnum, ignored = line.split(" ", 2)
1680 id = int(id, 16)
1681 if id != old_id:
1682 Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1683 old_id = id
1684 offsets[id] = fpos
1686 f.close()
1688 offsets_db = file(
1689 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1690 cPickle.dump(offsets, offsets_db, -1)
1691 offsets_db.close()
1693 def run(self, run_options, stats_keeper):
1694 Log().quiet("Determining offsets for all symbolic names...")
1695 Ctx()._projects = read_projects(
1696 artifact_manager.get_temp_file(config.PROJECTS)
1698 Ctx()._symbol_db = SymbolDatabase()
1699 self.generate_offsets_for_symbolings()
1700 Ctx()._symbol_db.close()
1701 Log().quiet("Done.")
1704 class OutputPass(Pass):
1705 """This pass was formerly known as pass8."""
1707 def register_artifacts(self):
1708 self._register_temp_file_needed(config.PROJECTS)
1709 self._register_temp_file_needed(config.CVS_PATHS_DB)
1710 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1711 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1712 self._register_temp_file_needed(config.SYMBOL_DB)
1713 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1714 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1715 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1716 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1717 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1718 Ctx().output_option.register_artifacts(self)
1720 def run(self, run_options, stats_keeper):
1721 Ctx()._projects = read_projects(
1722 artifact_manager.get_temp_file(config.PROJECTS)
1724 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1725 Ctx()._metadata_db = MetadataDatabase(
1726 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1727 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1728 DB_OPEN_READ,
1730 Ctx()._cvs_items_db = IndexedCVSItemStore(
1731 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1732 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1733 DB_OPEN_READ)
1734 Ctx()._symbol_db = SymbolDatabase()
1735 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1737 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1739 svn_revnum = 1
1740 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1741 while svn_commit:
1742 svn_commit.output(Ctx().output_option)
1743 svn_revnum += 1
1744 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1746 Ctx().output_option.cleanup()
1747 Ctx()._persistence_manager.close()
1749 Ctx()._symbol_db.close()
1750 Ctx()._cvs_items_db.close()
1751 Ctx()._metadata_db.close()
1752 Ctx()._cvs_path_db.close()
1755 # The list of passes constituting a run of cvs2svn:
1756 passes = [
1757 CollectRevsPass(),
1758 CleanMetadataPass(),
1759 CollateSymbolsPass(),
1760 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1761 FilterSymbolsPass(),
1762 SortRevisionsPass(),
1763 SortSymbolsPass(),
1764 InitializeChangesetsPass(),
1765 #CheckIndexedItemStoreDependenciesPass(
1766 # config.CVS_ITEMS_SORTED_STORE,
1767 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1768 BreakRevisionChangesetCyclesPass(),
1769 RevisionTopologicalSortPass(),
1770 BreakSymbolChangesetCyclesPass(),
1771 BreakAllChangesetCyclesPass(),
1772 TopologicalSortPass(),
1773 CreateRevsPass(),
1774 SortSymbolOpeningsClosingsPass(),
1775 IndexSymbolsPass(),
1776 OutputPass(),