Split up too-long line.
[cvs2svn.git] / cvs2svn_lib / passes.py
blobb143b5eb826f54b115e63ff4e09ff93757b55190
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import shutil
22 import cPickle
24 from cvs2svn_lib import config
25 from cvs2svn_lib.context import Ctx
26 from cvs2svn_lib.common import warning_prefix
27 from cvs2svn_lib.common import FatalException
28 from cvs2svn_lib.common import FatalError
29 from cvs2svn_lib.common import InternalError
30 from cvs2svn_lib.common import DB_OPEN_NEW
31 from cvs2svn_lib.common import DB_OPEN_READ
32 from cvs2svn_lib.common import DB_OPEN_WRITE
33 from cvs2svn_lib.common import Timestamper
34 from cvs2svn_lib.sort import sort_file
35 from cvs2svn_lib.log import Log
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_path_database import CVSPathDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.collect_data import CollectData
78 from cvs2svn_lib.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self):
88 self._register_temp_file(config.PROJECTS)
89 self._register_temp_file(config.SYMBOL_STATISTICS)
90 self._register_temp_file(config.METADATA_INDEX_TABLE)
91 self._register_temp_file(config.METADATA_STORE)
92 self._register_temp_file(config.CVS_PATHS_DB)
93 self._register_temp_file(config.CVS_ITEMS_STORE)
95 def run(self, run_options, stats_keeper):
96 Log().quiet("Examining all CVS ',v' files...")
97 Ctx()._projects = {}
98 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_NEW)
99 cd = CollectData(stats_keeper)
100 for project in run_options.projects:
101 cd.process_project(project)
102 run_options.projects = None
104 fatal_errors = cd.close()
106 if fatal_errors:
107 raise FatalException("Pass 1 complete.\n"
108 + "=" * 75 + "\n"
109 + "Error summary:\n"
110 + "\n".join(fatal_errors) + "\n"
111 + "Exited due to fatal error(s).")
113 Ctx()._cvs_path_db.close()
114 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
115 Log().quiet("Done")
118 class CleanMetadataPass(Pass):
119 """Clean up CVS revision metadata and write it to a new database."""
121 def register_artifacts(self):
122 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
123 self._register_temp_file(config.METADATA_CLEAN_STORE)
124 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
125 self._register_temp_file_needed(config.METADATA_STORE)
127 def _get_clean_author(self, author):
128 """Return AUTHOR, converted appropriately to UTF8.
130 Raise a UnicodeException if it cannot be converted using the
131 configured cvs_author_decoder."""
133 try:
134 return self._authors[author]
135 except KeyError:
136 pass
138 try:
139 clean_author = Ctx().cvs_author_decoder(author)
140 except UnicodeError:
141 self._authors[author] = author
142 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
144 try:
145 clean_author = clean_author.encode('utf8')
146 except UnicodeError:
147 self._authors[author] = author
148 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
150 self._authors[author] = clean_author
151 return clean_author
153 def _get_clean_log_msg(self, log_msg):
154 """Return LOG_MSG, converted appropriately to UTF8.
156 Raise a UnicodeException if it cannot be converted using the
157 configured cvs_log_decoder."""
159 try:
160 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
161 except UnicodeError:
162 raise UnicodeError(
163 'Problem decoding log message:\n'
164 '%s\n'
165 '%s\n'
166 '%s'
167 % ('-' * 75, log_msg, '-' * 75,)
170 try:
171 return clean_log_msg.encode('utf8')
172 except UnicodeError:
173 raise UnicodeError(
174 'Problem encoding log message:\n'
175 '%s\n'
176 '%s\n'
177 '%s'
178 % ('-' * 75, log_msg, '-' * 75,)
181 def _clean_metadata(self, metadata):
182 """Clean up METADATA by overwriting its members as necessary."""
184 try:
185 metadata.author = self._get_clean_author(metadata.author)
186 except UnicodeError, e:
187 Log().warn('%s: %s' % (warning_prefix, e,))
188 self.warnings = True
190 try:
191 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
192 except UnicodeError, e:
193 Log().warn('%s: %s' % (warning_prefix, e,))
194 self.warnings = True
196 def run(self, run_options, stats_keeper):
197 Log().quiet("Converting metadata to UTF8...")
198 metadata_db = MetadataDatabase(
199 artifact_manager.get_temp_file(config.METADATA_STORE),
200 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
201 DB_OPEN_READ,
203 metadata_clean_db = MetadataDatabase(
204 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
205 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
206 DB_OPEN_NEW,
209 self.warnings = False
211 # A map {author : clean_author} for those known (to avoid
212 # repeating warnings):
213 self._authors = {}
215 for id in metadata_db.iterkeys():
216 metadata = metadata_db[id]
218 # Record the original author name because it might be needed for
219 # expanding CVS keywords:
220 metadata.original_author = metadata.author
222 self._clean_metadata(metadata)
224 metadata_clean_db[id] = metadata
226 if self.warnings:
227 raise FatalError(
228 'There were warnings converting author names and/or log messages\n'
229 'to Unicode (see messages above). Please restart this pass\n'
230 'with one or more \'--encoding\' parameters or with\n'
231 '\'--fallback-encoding\'.'
234 metadata_clean_db.close()
235 metadata_db.close()
236 Log().quiet("Done")
239 class CollateSymbolsPass(Pass):
240 """Divide symbols into branches, tags, and excludes."""
242 conversion_names = {
243 Trunk : 'trunk',
244 Branch : 'branch',
245 Tag : 'tag',
246 ExcludedSymbol : 'exclude',
247 Symbol : '.',
250 def register_artifacts(self):
251 self._register_temp_file(config.SYMBOL_DB)
252 self._register_temp_file_needed(config.PROJECTS)
253 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
255 def get_symbol(self, run_options, stats):
256 """Use StrategyRules to decide what to do with a symbol.
258 STATS is an instance of symbol_statistics._Stats describing an
259 instance of Symbol or Trunk. To determine how the symbol is to be
260 converted, consult the StrategyRules in the project's
261 symbol_strategy_rules. Each rule is allowed a chance to change
262 the way the symbol will be converted. If the symbol is not a
263 Trunk or TypedSymbol after all rules have run, raise
264 IndeterminateSymbolException."""
266 symbol = stats.lod
267 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
268 for rule in rules:
269 symbol = rule.get_symbol(symbol, stats)
270 assert symbol is not None
272 stats.check_valid(symbol)
274 return symbol
276 def log_symbol_summary(self, stats, symbol):
277 if not self.symbol_info_file:
278 return
280 if isinstance(symbol, Trunk):
281 name = '.trunk.'
282 preferred_parent_name = '.'
283 else:
284 name = stats.lod.name
285 if symbol.preferred_parent_id is None:
286 preferred_parent_name = '.'
287 else:
288 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
289 if isinstance(preferred_parent, Trunk):
290 preferred_parent_name = '.trunk.'
291 else:
292 preferred_parent_name = preferred_parent.name
294 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
295 symbol_path = symbol.base_path
296 else:
297 symbol_path = '.'
299 self.symbol_info_file.write(
300 '%-5d %-30s %-10s %s %s\n' % (
301 stats.lod.project.id,
302 name,
303 self.conversion_names[symbol.__class__],
304 symbol_path,
305 preferred_parent_name,
308 self.symbol_info_file.write(' # %s\n' % (stats,))
309 parent_counts = stats.possible_parents.items()
310 if parent_counts:
311 self.symbol_info_file.write(' # Possible parents:\n')
312 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
313 for (pp, count) in parent_counts:
314 if isinstance(pp, Trunk):
315 self.symbol_info_file.write(
316 ' # .trunk. : %d\n' % (count,)
318 else:
319 self.symbol_info_file.write(
320 ' # %s : %d\n' % (pp.name, count,)
323 def get_symbols(self, run_options):
324 """Return a map telling how to convert symbols.
326 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
327 indicating how each symbol should be converted. Trunk objects in
328 SYMBOL_STATS are passed through unchanged. One object is included
329 in the return value for each line of development described in
330 SYMBOL_STATS.
332 Raise FatalError if there was an error."""
334 errors = []
335 mismatches = []
337 if Ctx().symbol_info_filename is not None:
338 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
339 self.symbol_info_file.write(
340 '# Columns: project_id symbol_name conversion symbol_path '
341 'preferred_parent_name\n'
343 else:
344 self.symbol_info_file = None
346 # Initialize each symbol strategy rule a single time, even if it
347 # is used in more than one project. First define a map from
348 # object id to symbol strategy rule:
349 rules = {}
350 for rule_list in run_options.project_symbol_strategy_rules:
351 for rule in rule_list:
352 rules[id(rule)] = rule
354 for rule in rules.itervalues():
355 rule.start(self.symbol_stats)
357 retval = {}
359 for stats in self.symbol_stats:
360 try:
361 symbol = self.get_symbol(run_options, stats)
362 except IndeterminateSymbolException, e:
363 self.log_symbol_summary(stats, stats.lod)
364 mismatches.append(e.stats)
365 except SymbolPlanError, e:
366 self.log_symbol_summary(stats, stats.lod)
367 errors.append(e)
368 else:
369 self.log_symbol_summary(stats, symbol)
370 retval[stats.lod] = symbol
372 for rule in rules.itervalues():
373 rule.finish()
375 if self.symbol_info_file:
376 self.symbol_info_file.close()
378 del self.symbol_info_file
380 if errors or mismatches:
381 s = ['Problems determining how symbols should be converted:\n']
382 for e in errors:
383 s.append('%s\n' % (e,))
384 if mismatches:
385 s.append(
386 'It is not clear how the following symbols '
387 'should be converted.\n'
388 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
389 'and/or\n'
390 '--symbol-default to resolve the ambiguity.\n'
392 for stats in mismatches:
393 s.append(' %s\n' % (stats,))
394 raise FatalError(''.join(s))
395 else:
396 return retval
398 def run(self, run_options, stats_keeper):
399 Ctx()._projects = read_projects(
400 artifact_manager.get_temp_file(config.PROJECTS)
402 self.symbol_stats = SymbolStatistics(
403 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
406 symbol_map = self.get_symbols(run_options)
408 # Check the symbols for consistency and bail out if there were errors:
409 self.symbol_stats.check_consistency(symbol_map)
411 # Check that the symbols all have SVN paths set and that the paths
412 # are disjoint:
413 Ctx().output_option.check_symbols(symbol_map)
415 for symbol in symbol_map.itervalues():
416 if isinstance(symbol, ExcludedSymbol):
417 self.symbol_stats.exclude_symbol(symbol)
419 create_symbol_database(symbol_map.values())
421 del self.symbol_stats
423 Log().quiet("Done")
426 class FilterSymbolsPass(Pass):
427 """Delete any branches/tags that are to be excluded.
429 Also delete revisions on excluded branches, and delete other
430 references to the excluded symbols."""
432 def register_artifacts(self):
433 self._register_temp_file(config.ITEM_SERIALIZER)
434 self._register_temp_file(config.CVS_REVS_DATAFILE)
435 self._register_temp_file(config.CVS_SYMBOLS_DATAFILE)
436 self._register_temp_file_needed(config.PROJECTS)
437 self._register_temp_file_needed(config.SYMBOL_DB)
438 self._register_temp_file_needed(config.CVS_PATHS_DB)
439 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
440 Ctx().revision_collector.register_artifacts(self)
442 def run(self, run_options, stats_keeper):
443 Ctx()._projects = read_projects(
444 artifact_manager.get_temp_file(config.PROJECTS)
446 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
447 Ctx()._symbol_db = SymbolDatabase()
448 cvs_item_store = OldCVSItemStore(
449 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
451 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
452 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'wb')
453 cPickle.dump(cvs_item_serializer, f, -1)
454 f.close()
456 rev_db = NewSortableCVSRevisionDatabase(
457 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
458 cvs_item_serializer,
461 symbol_db = NewSortableCVSSymbolDatabase(
462 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
463 cvs_item_serializer,
466 revision_collector = Ctx().revision_collector
468 Log().quiet("Filtering out excluded symbols and summarizing items...")
470 stats_keeper.reset_cvs_rev_info()
471 revision_collector.start()
473 # Process the cvs items store one file at a time:
474 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
475 Log().verbose(cvs_file_items.cvs_file.filename)
476 cvs_file_items.filter_excluded_symbols()
477 cvs_file_items.mutate_symbols()
478 cvs_file_items.adjust_parents()
479 cvs_file_items.refine_symbols()
480 cvs_file_items.determine_revision_properties(
481 Ctx().revision_property_setters
483 cvs_file_items.record_opened_symbols()
484 cvs_file_items.record_closed_symbols()
485 cvs_file_items.check_link_consistency()
487 # Give the revision collector a chance to collect data about the
488 # file:
489 revision_collector.process_file(cvs_file_items)
491 # Store whatever is left to the new file and update statistics:
492 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
493 for cvs_item in cvs_file_items.values():
494 stats_keeper.record_cvs_item(cvs_item)
496 if isinstance(cvs_item, CVSRevision):
497 rev_db.add(cvs_item)
498 elif isinstance(cvs_item, CVSSymbol):
499 symbol_db.add(cvs_item)
501 stats_keeper.set_stats_reflect_exclude(True)
503 rev_db.close()
504 symbol_db.close()
505 revision_collector.finish()
506 cvs_item_store.close()
507 Ctx()._symbol_db.close()
508 Ctx()._cvs_path_db.close()
510 Log().quiet("Done")
513 class SortRevisionsPass(Pass):
514 """Sort the revisions file."""
516 def register_artifacts(self):
517 self._register_temp_file(config.CVS_REVS_SORTED_DATAFILE)
518 self._register_temp_file_needed(config.CVS_REVS_DATAFILE)
520 def run(self, run_options, stats_keeper):
521 Log().quiet("Sorting CVS revision summaries...")
522 sort_file(
523 artifact_manager.get_temp_file(config.CVS_REVS_DATAFILE),
524 artifact_manager.get_temp_file(
525 config.CVS_REVS_SORTED_DATAFILE
527 tempdirs=[Ctx().tmpdir],
529 Log().quiet("Done")
532 class SortSymbolsPass(Pass):
533 """Sort the symbols file."""
535 def register_artifacts(self):
536 self._register_temp_file(config.CVS_SYMBOLS_SORTED_DATAFILE)
537 self._register_temp_file_needed(config.CVS_SYMBOLS_DATAFILE)
539 def run(self, run_options, stats_keeper):
540 Log().quiet("Sorting CVS symbol summaries...")
541 sort_file(
542 artifact_manager.get_temp_file(config.CVS_SYMBOLS_DATAFILE),
543 artifact_manager.get_temp_file(
544 config.CVS_SYMBOLS_SORTED_DATAFILE
546 tempdirs=[Ctx().tmpdir],
548 Log().quiet("Done")
551 class InitializeChangesetsPass(Pass):
552 """Create preliminary CommitSets."""
554 def register_artifacts(self):
555 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
556 self._register_temp_file(config.CHANGESETS_STORE)
557 self._register_temp_file(config.CHANGESETS_INDEX)
558 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
559 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
560 self._register_temp_file_needed(config.PROJECTS)
561 self._register_temp_file_needed(config.SYMBOL_DB)
562 self._register_temp_file_needed(config.CVS_PATHS_DB)
563 self._register_temp_file_needed(config.ITEM_SERIALIZER)
564 self._register_temp_file_needed(config.CVS_REVS_SORTED_DATAFILE)
565 self._register_temp_file_needed(
566 config.CVS_SYMBOLS_SORTED_DATAFILE)
568 def get_revision_changesets(self):
569 """Generate revision changesets, one at a time.
571 Each time, yield a list of CVSRevisions that might potentially
572 consititute a changeset."""
574 # Create changesets for CVSRevisions:
575 old_metadata_id = None
576 old_timestamp = None
577 changeset_items = []
579 db = OldSortableCVSRevisionDatabase(
580 artifact_manager.get_temp_file(
581 config.CVS_REVS_SORTED_DATAFILE
583 self.cvs_item_serializer,
586 for cvs_rev in db:
587 if cvs_rev.metadata_id != old_metadata_id \
588 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
589 # Start a new changeset. First finish up the old changeset,
590 # if any:
591 if changeset_items:
592 yield changeset_items
593 changeset_items = []
594 old_metadata_id = cvs_rev.metadata_id
595 changeset_items.append(cvs_rev)
596 old_timestamp = cvs_rev.timestamp
598 # Finish up the last changeset, if any:
599 if changeset_items:
600 yield changeset_items
602 def get_symbol_changesets(self):
603 """Generate symbol changesets, one at a time.
605 Each time, yield a list of CVSSymbols that might potentially
606 consititute a changeset."""
608 old_symbol_id = None
609 changeset_items = []
611 db = OldSortableCVSSymbolDatabase(
612 artifact_manager.get_temp_file(
613 config.CVS_SYMBOLS_SORTED_DATAFILE
615 self.cvs_item_serializer,
618 for cvs_symbol in db:
619 if cvs_symbol.symbol.id != old_symbol_id:
620 # Start a new changeset. First finish up the old changeset,
621 # if any:
622 if changeset_items:
623 yield changeset_items
624 changeset_items = []
625 old_symbol_id = cvs_symbol.symbol.id
626 changeset_items.append(cvs_symbol)
628 # Finish up the last changeset, if any:
629 if changeset_items:
630 yield changeset_items
632 @staticmethod
633 def compare_items(a, b):
634 return (
635 cmp(a.timestamp, b.timestamp)
636 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
637 or cmp([int(x) for x in a.rev.split('.')],
638 [int(x) for x in b.rev.split('.')])
639 or cmp(a.id, b.id))
641 def break_internal_dependencies(self, changeset_items):
642 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
644 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
645 belong in a single RevisionChangeset, but there might be internal
646 dependencies among the items. Return a list of lists, where each
647 sublist is a list of CVSRevisions and at least one internal
648 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
649 to be split, then the return value will contain a single value,
650 namely the original value of CHANGESET_ITEMS. Split
651 CHANGESET_ITEMS at most once, even though the resulting changesets
652 might themselves have internal dependencies."""
654 # We only look for succ dependencies, since by doing so we
655 # automatically cover pred dependencies as well. First create a
656 # list of tuples (pred, succ) of id pairs for CVSItems that depend
657 # on each other.
658 dependencies = []
659 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
660 for cvs_item in changeset_items:
661 for next_id in cvs_item.get_succ_ids():
662 if next_id in changeset_cvs_item_ids:
663 # Sanity check: a CVSItem should never depend on itself:
664 if next_id == cvs_item.id:
665 raise InternalError('Item depends on itself: %s' % (cvs_item,))
667 dependencies.append((cvs_item.id, next_id,))
669 if dependencies:
670 # Sort the changeset_items in a defined order (chronological to the
671 # extent that the timestamps are correct and unique).
672 changeset_items.sort(self.compare_items)
673 indexes = {}
674 for (i, changeset_item) in enumerate(changeset_items):
675 indexes[changeset_item.id] = i
676 # How many internal dependencies would be broken by breaking the
677 # Changeset after a particular index?
678 breaks = [0] * len(changeset_items)
679 for (pred, succ,) in dependencies:
680 pred_index = indexes[pred]
681 succ_index = indexes[succ]
682 breaks[min(pred_index, succ_index)] += 1
683 breaks[max(pred_index, succ_index)] -= 1
684 best_i = None
685 best_count = -1
686 best_time = 0
687 for i in range(1, len(breaks)):
688 breaks[i] += breaks[i - 1]
689 for i in range(0, len(breaks) - 1):
690 if breaks[i] > best_count:
691 best_i = i
692 best_count = breaks[i]
693 best_time = (changeset_items[i + 1].timestamp
694 - changeset_items[i].timestamp)
695 elif breaks[i] == best_count \
696 and (changeset_items[i + 1].timestamp
697 - changeset_items[i].timestamp) < best_time:
698 best_i = i
699 best_count = breaks[i]
700 best_time = (changeset_items[i + 1].timestamp
701 - changeset_items[i].timestamp)
702 # Reuse the old changeset.id for the first of the split changesets.
703 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
704 else:
705 return [changeset_items]
707 def break_all_internal_dependencies(self, changeset_items):
708 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
710 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
711 be part of a single changeset. Break this list into sublists,
712 where the CVSRevisions in each sublist are free of mutual
713 dependencies."""
715 # This method is written non-recursively to avoid any possible
716 # problems with recursion depth.
718 changesets_to_split = [changeset_items]
719 while changesets_to_split:
720 changesets = self.break_internal_dependencies(changesets_to_split.pop())
721 if len(changesets) == 1:
722 [changeset_items] = changesets
723 yield changeset_items
724 else:
725 # The changeset had to be split; see if either of the
726 # fragments have to be split:
727 changesets.reverse()
728 changesets_to_split.extend(changesets)
730 def get_changesets(self):
731 """Generate (Changeset, [CVSItem,...]) for all changesets.
733 The Changesets already have their internal dependencies broken.
734 The [CVSItem,...] list is the list of CVSItems in the
735 corresponding Changeset."""
737 for changeset_items in self.get_revision_changesets():
738 for split_changeset_items \
739 in self.break_all_internal_dependencies(changeset_items):
740 yield (
741 RevisionChangeset(
742 self.changeset_key_generator.gen_id(),
743 [cvs_rev.id for cvs_rev in split_changeset_items]
745 split_changeset_items,
748 for changeset_items in self.get_symbol_changesets():
749 yield (
750 create_symbol_changeset(
751 self.changeset_key_generator.gen_id(),
752 changeset_items[0].symbol,
753 [cvs_symbol.id for cvs_symbol in changeset_items]
755 changeset_items,
758 def run(self, run_options, stats_keeper):
759 Log().quiet("Creating preliminary commit sets...")
761 Ctx()._projects = read_projects(
762 artifact_manager.get_temp_file(config.PROJECTS)
764 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
765 Ctx()._symbol_db = SymbolDatabase()
767 f = open(artifact_manager.get_temp_file(config.ITEM_SERIALIZER), 'rb')
768 self.cvs_item_serializer = cPickle.load(f)
769 f.close()
771 changeset_db = ChangesetDatabase(
772 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
773 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
774 DB_OPEN_NEW,
776 cvs_item_to_changeset_id = CVSItemToChangesetTable(
777 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
778 DB_OPEN_NEW,
781 self.sorted_cvs_items_db = IndexedCVSItemStore(
782 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
783 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
784 DB_OPEN_NEW)
786 self.changeset_key_generator = KeyGenerator()
788 for (changeset, changeset_items) in self.get_changesets():
789 if Log().is_on(Log.DEBUG):
790 Log().debug(repr(changeset))
791 changeset_db.store(changeset)
792 for cvs_item in changeset_items:
793 self.sorted_cvs_items_db.add(cvs_item)
794 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
796 self.sorted_cvs_items_db.close()
797 cvs_item_to_changeset_id.close()
798 changeset_db.close()
799 Ctx()._symbol_db.close()
800 Ctx()._cvs_path_db.close()
802 del self.cvs_item_serializer
804 Log().quiet("Done")
807 class ProcessedChangesetLogger:
808 def __init__(self):
809 self.processed_changeset_ids = []
811 def log(self, changeset_id):
812 if Log().is_on(Log.DEBUG):
813 self.processed_changeset_ids.append(changeset_id)
815 def flush(self):
816 if self.processed_changeset_ids:
817 Log().debug(
818 'Consumed changeset ids %s'
819 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
821 del self.processed_changeset_ids[:]
824 class BreakRevisionChangesetCyclesPass(Pass):
825 """Break up any dependency cycles involving only RevisionChangesets."""
827 def register_artifacts(self):
828 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
829 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
830 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
831 self._register_temp_file_needed(config.PROJECTS)
832 self._register_temp_file_needed(config.SYMBOL_DB)
833 self._register_temp_file_needed(config.CVS_PATHS_DB)
834 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
835 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
836 self._register_temp_file_needed(config.CHANGESETS_STORE)
837 self._register_temp_file_needed(config.CHANGESETS_INDEX)
838 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
840 def get_source_changesets(self):
841 old_changeset_db = ChangesetDatabase(
842 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
843 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
844 DB_OPEN_READ)
846 changeset_ids = old_changeset_db.keys()
848 for changeset_id in changeset_ids:
849 yield old_changeset_db[changeset_id]
851 old_changeset_db.close()
852 del old_changeset_db
854 def break_cycle(self, cycle):
855 """Break up one or more changesets in CYCLE to help break the cycle.
857 CYCLE is a list of Changesets where
859 cycle[i] depends on cycle[i - 1]
861 Break up one or more changesets in CYCLE to make progress towards
862 breaking the cycle. Update self.changeset_graph accordingly.
864 It is not guaranteed that the cycle will be broken by one call to
865 this routine, but at least some progress must be made."""
867 self.processed_changeset_logger.flush()
868 best_i = None
869 best_link = None
870 for i in range(len(cycle)):
871 # It's OK if this index wraps to -1:
872 link = ChangesetGraphLink(
873 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
875 if best_i is None or link < best_link:
876 best_i = i
877 best_link = link
879 if Log().is_on(Log.DEBUG):
880 Log().debug(
881 'Breaking cycle %s by breaking node %x' % (
882 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
883 best_link.changeset.id,))
885 new_changesets = best_link.break_changeset(self.changeset_key_generator)
887 self.changeset_graph.delete_changeset(best_link.changeset)
889 for changeset in new_changesets:
890 self.changeset_graph.add_new_changeset(changeset)
892 def run(self, run_options, stats_keeper):
893 Log().quiet("Breaking revision changeset dependency cycles...")
895 Ctx()._projects = read_projects(
896 artifact_manager.get_temp_file(config.PROJECTS)
898 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
899 Ctx()._symbol_db = SymbolDatabase()
900 Ctx()._cvs_items_db = IndexedCVSItemStore(
901 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
902 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
903 DB_OPEN_READ)
905 shutil.copyfile(
906 artifact_manager.get_temp_file(
907 config.CVS_ITEM_TO_CHANGESET),
908 artifact_manager.get_temp_file(
909 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
910 cvs_item_to_changeset_id = CVSItemToChangesetTable(
911 artifact_manager.get_temp_file(
912 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
913 DB_OPEN_WRITE)
915 changeset_db = ChangesetDatabase(
916 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
917 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
918 DB_OPEN_NEW)
920 self.changeset_graph = ChangesetGraph(
921 changeset_db, cvs_item_to_changeset_id
924 max_changeset_id = 0
925 for changeset in self.get_source_changesets():
926 changeset_db.store(changeset)
927 if isinstance(changeset, RevisionChangeset):
928 self.changeset_graph.add_changeset(changeset)
929 max_changeset_id = max(max_changeset_id, changeset.id)
931 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
933 self.processed_changeset_logger = ProcessedChangesetLogger()
935 # Consume the graph, breaking cycles using self.break_cycle():
936 for (changeset, time_range) in self.changeset_graph.consume_graph(
937 cycle_breaker=self.break_cycle
939 self.processed_changeset_logger.log(changeset.id)
941 self.processed_changeset_logger.flush()
942 del self.processed_changeset_logger
944 self.changeset_graph.close()
945 self.changeset_graph = None
946 Ctx()._cvs_items_db.close()
947 Ctx()._symbol_db.close()
948 Ctx()._cvs_path_db.close()
950 Log().quiet("Done")
953 class RevisionTopologicalSortPass(Pass):
954 """Sort RevisionChangesets into commit order.
956 Also convert them to OrderedChangesets, without changing their ids."""
958 def register_artifacts(self):
959 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
960 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
961 self._register_temp_file_needed(config.PROJECTS)
962 self._register_temp_file_needed(config.SYMBOL_DB)
963 self._register_temp_file_needed(config.CVS_PATHS_DB)
964 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
965 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
966 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
967 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
968 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
970 def get_source_changesets(self, changeset_db):
971 changeset_ids = changeset_db.keys()
973 for changeset_id in changeset_ids:
974 yield changeset_db[changeset_id]
976 def get_changesets(self):
977 changeset_db = ChangesetDatabase(
978 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
979 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
980 DB_OPEN_READ,
983 changeset_graph = ChangesetGraph(
984 changeset_db,
985 CVSItemToChangesetTable(
986 artifact_manager.get_temp_file(
987 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
989 DB_OPEN_READ,
993 for changeset in self.get_source_changesets(changeset_db):
994 if isinstance(changeset, RevisionChangeset):
995 changeset_graph.add_changeset(changeset)
996 else:
997 yield changeset
999 changeset_ids = []
1001 # Sentry:
1002 changeset_ids.append(None)
1004 for (changeset, time_range) in changeset_graph.consume_graph():
1005 changeset_ids.append(changeset.id)
1007 # Sentry:
1008 changeset_ids.append(None)
1010 for i in range(1, len(changeset_ids) - 1):
1011 changeset = changeset_db[changeset_ids[i]]
1012 yield OrderedChangeset(
1013 changeset.id, changeset.cvs_item_ids, i - 1,
1014 changeset_ids[i - 1], changeset_ids[i + 1])
1016 changeset_graph.close()
1018 def run(self, run_options, stats_keeper):
1019 Log().quiet("Generating CVSRevisions in commit order...")
1021 Ctx()._projects = read_projects(
1022 artifact_manager.get_temp_file(config.PROJECTS)
1024 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1025 Ctx()._symbol_db = SymbolDatabase()
1026 Ctx()._cvs_items_db = IndexedCVSItemStore(
1027 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1028 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1029 DB_OPEN_READ)
1031 changesets_revordered_db = ChangesetDatabase(
1032 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1033 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1034 DB_OPEN_NEW)
1036 for changeset in self.get_changesets():
1037 changesets_revordered_db.store(changeset)
1039 changesets_revordered_db.close()
1040 Ctx()._cvs_items_db.close()
1041 Ctx()._symbol_db.close()
1042 Ctx()._cvs_path_db.close()
1044 Log().quiet("Done")
1047 class BreakSymbolChangesetCyclesPass(Pass):
1048 """Break up any dependency cycles involving only SymbolChangesets."""
1050 def register_artifacts(self):
1051 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1052 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1053 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1054 self._register_temp_file_needed(config.PROJECTS)
1055 self._register_temp_file_needed(config.SYMBOL_DB)
1056 self._register_temp_file_needed(config.CVS_PATHS_DB)
1057 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1058 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1059 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1060 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1061 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1063 def get_source_changesets(self):
1064 old_changeset_db = ChangesetDatabase(
1065 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1066 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1067 DB_OPEN_READ)
1069 changeset_ids = old_changeset_db.keys()
1071 for changeset_id in changeset_ids:
1072 yield old_changeset_db[changeset_id]
1074 old_changeset_db.close()
1076 def break_cycle(self, cycle):
1077 """Break up one or more changesets in CYCLE to help break the cycle.
1079 CYCLE is a list of Changesets where
1081 cycle[i] depends on cycle[i - 1]
1083 Break up one or more changesets in CYCLE to make progress towards
1084 breaking the cycle. Update self.changeset_graph accordingly.
1086 It is not guaranteed that the cycle will be broken by one call to
1087 this routine, but at least some progress must be made."""
1089 self.processed_changeset_logger.flush()
1090 best_i = None
1091 best_link = None
1092 for i in range(len(cycle)):
1093 # It's OK if this index wraps to -1:
1094 link = ChangesetGraphLink(
1095 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1097 if best_i is None or link < best_link:
1098 best_i = i
1099 best_link = link
1101 if Log().is_on(Log.DEBUG):
1102 Log().debug(
1103 'Breaking cycle %s by breaking node %x' % (
1104 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1105 best_link.changeset.id,))
1107 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1109 self.changeset_graph.delete_changeset(best_link.changeset)
1111 for changeset in new_changesets:
1112 self.changeset_graph.add_new_changeset(changeset)
1114 def run(self, run_options, stats_keeper):
1115 Log().quiet("Breaking symbol changeset dependency cycles...")
1117 Ctx()._projects = read_projects(
1118 artifact_manager.get_temp_file(config.PROJECTS)
1120 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1121 Ctx()._symbol_db = SymbolDatabase()
1122 Ctx()._cvs_items_db = IndexedCVSItemStore(
1123 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1124 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1125 DB_OPEN_READ)
1127 shutil.copyfile(
1128 artifact_manager.get_temp_file(
1129 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1130 artifact_manager.get_temp_file(
1131 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1132 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1133 artifact_manager.get_temp_file(
1134 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1135 DB_OPEN_WRITE)
1137 changeset_db = ChangesetDatabase(
1138 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1139 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1140 DB_OPEN_NEW)
1142 self.changeset_graph = ChangesetGraph(
1143 changeset_db, cvs_item_to_changeset_id
1146 max_changeset_id = 0
1147 for changeset in self.get_source_changesets():
1148 changeset_db.store(changeset)
1149 if isinstance(changeset, SymbolChangeset):
1150 self.changeset_graph.add_changeset(changeset)
1151 max_changeset_id = max(max_changeset_id, changeset.id)
1153 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1155 self.processed_changeset_logger = ProcessedChangesetLogger()
1157 # Consume the graph, breaking cycles using self.break_cycle():
1158 for (changeset, time_range) in self.changeset_graph.consume_graph(
1159 cycle_breaker=self.break_cycle
1161 self.processed_changeset_logger.log(changeset.id)
1163 self.processed_changeset_logger.flush()
1164 del self.processed_changeset_logger
1166 self.changeset_graph.close()
1167 self.changeset_graph = None
1168 Ctx()._cvs_items_db.close()
1169 Ctx()._symbol_db.close()
1170 Ctx()._cvs_path_db.close()
1172 Log().quiet("Done")
1175 class BreakAllChangesetCyclesPass(Pass):
1176 """Break up any dependency cycles that are closed by SymbolChangesets."""
1178 def register_artifacts(self):
1179 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1180 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1181 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1182 self._register_temp_file_needed(config.PROJECTS)
1183 self._register_temp_file_needed(config.SYMBOL_DB)
1184 self._register_temp_file_needed(config.CVS_PATHS_DB)
1185 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1186 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1187 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1188 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1189 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1191 def get_source_changesets(self):
1192 old_changeset_db = ChangesetDatabase(
1193 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1194 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1195 DB_OPEN_READ)
1197 changeset_ids = old_changeset_db.keys()
1199 for changeset_id in changeset_ids:
1200 yield old_changeset_db[changeset_id]
1202 old_changeset_db.close()
1204 def _split_retrograde_changeset(self, changeset):
1205 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1207 Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
1209 self.changeset_graph.delete_changeset(changeset)
1211 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1212 ordinal_limits = {}
1213 for cvs_branch in changeset.iter_cvs_items():
1214 max_pred_ordinal = 0
1215 min_succ_ordinal = sys.maxint
1217 for pred_id in cvs_branch.get_pred_ids():
1218 pred_ordinal = self.ordinals.get(
1219 self.cvs_item_to_changeset_id[pred_id], 0)
1220 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1222 for succ_id in cvs_branch.get_succ_ids():
1223 succ_ordinal = self.ordinals.get(
1224 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1225 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1227 assert max_pred_ordinal < min_succ_ordinal
1228 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1230 # Find the earliest successor ordinal:
1231 min_min_succ_ordinal = sys.maxint
1232 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1233 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1235 early_item_ids = []
1236 late_item_ids = []
1237 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1238 if max_pred_ordinal >= min_min_succ_ordinal:
1239 late_item_ids.append(id)
1240 else:
1241 early_item_ids.append(id)
1243 assert early_item_ids
1244 assert late_item_ids
1246 early_changeset = changeset.create_split_changeset(
1247 self.changeset_key_generator.gen_id(), early_item_ids)
1248 late_changeset = changeset.create_split_changeset(
1249 self.changeset_key_generator.gen_id(), late_item_ids)
1251 self.changeset_graph.add_new_changeset(early_changeset)
1252 self.changeset_graph.add_new_changeset(late_changeset)
1254 early_split = self._split_if_retrograde(early_changeset.id)
1256 # Because of the way we constructed it, the early changeset should
1257 # not have to be split:
1258 assert not early_split
1260 self._split_if_retrograde(late_changeset.id)
1262 def _split_if_retrograde(self, changeset_id):
1263 node = self.changeset_graph[changeset_id]
1264 pred_ordinals = [
1265 self.ordinals[id]
1266 for id in node.pred_ids
1267 if id in self.ordinals
1269 pred_ordinals.sort()
1270 succ_ordinals = [
1271 self.ordinals[id]
1272 for id in node.succ_ids
1273 if id in self.ordinals
1275 succ_ordinals.sort()
1276 if pred_ordinals and succ_ordinals \
1277 and pred_ordinals[-1] >= succ_ordinals[0]:
1278 self._split_retrograde_changeset(self.changeset_db[node.id])
1279 return True
1280 else:
1281 return False
1283 def break_segment(self, segment):
1284 """Break a changeset in SEGMENT[1:-1].
1286 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1287 that range are SymbolChangesets."""
1289 best_i = None
1290 best_link = None
1291 for i in range(1, len(segment) - 1):
1292 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1294 if best_i is None or link < best_link:
1295 best_i = i
1296 best_link = link
1298 if Log().is_on(Log.DEBUG):
1299 Log().debug(
1300 'Breaking segment %s by breaking node %x' % (
1301 ' -> '.join(['%x' % node.id for node in segment]),
1302 best_link.changeset.id,))
1304 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1306 self.changeset_graph.delete_changeset(best_link.changeset)
1308 for changeset in new_changesets:
1309 self.changeset_graph.add_new_changeset(changeset)
1311 def break_cycle(self, cycle):
1312 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1314 CYCLE is a list of SymbolChangesets where
1316 cycle[i] depends on cycle[i - 1]
1318 . Break up one or more changesets in CYCLE to make progress
1319 towards breaking the cycle. Update self.changeset_graph
1320 accordingly.
1322 It is not guaranteed that the cycle will be broken by one call to
1323 this routine, but at least some progress must be made."""
1325 if Log().is_on(Log.DEBUG):
1326 Log().debug(
1327 'Breaking cycle %s' % (
1328 ' -> '.join(['%x' % changeset.id
1329 for changeset in cycle + [cycle[0]]]),))
1331 # Unwrap the cycle into a segment then break the segment:
1332 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1334 def run(self, run_options, stats_keeper):
1335 Log().quiet("Breaking CVSSymbol dependency loops...")
1337 Ctx()._projects = read_projects(
1338 artifact_manager.get_temp_file(config.PROJECTS)
1340 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1341 Ctx()._symbol_db = SymbolDatabase()
1342 Ctx()._cvs_items_db = IndexedCVSItemStore(
1343 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1344 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1345 DB_OPEN_READ)
1347 shutil.copyfile(
1348 artifact_manager.get_temp_file(
1349 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1350 artifact_manager.get_temp_file(
1351 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1352 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1353 artifact_manager.get_temp_file(
1354 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1355 DB_OPEN_WRITE)
1357 self.changeset_db = ChangesetDatabase(
1358 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1359 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1360 DB_OPEN_NEW)
1362 self.changeset_graph = ChangesetGraph(
1363 self.changeset_db, self.cvs_item_to_changeset_id
1366 # A map {changeset_id : ordinal} for OrderedChangesets:
1367 self.ordinals = {}
1368 # A map {ordinal : changeset_id}:
1369 ordered_changeset_map = {}
1370 # A list of all BranchChangeset ids:
1371 branch_changeset_ids = []
1372 max_changeset_id = 0
1373 for changeset in self.get_source_changesets():
1374 self.changeset_db.store(changeset)
1375 self.changeset_graph.add_changeset(changeset)
1376 if isinstance(changeset, OrderedChangeset):
1377 ordered_changeset_map[changeset.ordinal] = changeset.id
1378 self.ordinals[changeset.id] = changeset.ordinal
1379 elif isinstance(changeset, BranchChangeset):
1380 branch_changeset_ids.append(changeset.id)
1381 max_changeset_id = max(max_changeset_id, changeset.id)
1383 # An array of ordered_changeset ids, indexed by ordinal:
1384 ordered_changesets = []
1385 for ordinal in range(len(ordered_changeset_map)):
1386 id = ordered_changeset_map[ordinal]
1387 ordered_changesets.append(id)
1389 ordered_changeset_ids = set(ordered_changeset_map.values())
1390 del ordered_changeset_map
1392 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1394 # First we scan through all BranchChangesets looking for
1395 # changesets that are individually "retrograde" and splitting
1396 # those up:
1397 for changeset_id in branch_changeset_ids:
1398 self._split_if_retrograde(changeset_id)
1400 del self.ordinals
1402 next_ordered_changeset = 0
1404 self.processed_changeset_logger = ProcessedChangesetLogger()
1406 while self.changeset_graph:
1407 # Consume any nodes that don't have predecessors:
1408 for (changeset, time_range) \
1409 in self.changeset_graph.consume_nopred_nodes():
1410 self.processed_changeset_logger.log(changeset.id)
1411 if changeset.id in ordered_changeset_ids:
1412 next_ordered_changeset += 1
1413 ordered_changeset_ids.remove(changeset.id)
1415 self.processed_changeset_logger.flush()
1417 if not self.changeset_graph:
1418 break
1420 # Now work on the next ordered changeset that has not yet been
1421 # processed. BreakSymbolChangesetCyclesPass has broken any
1422 # cycles involving only SymbolChangesets, so the presence of a
1423 # cycle implies that there is at least one ordered changeset
1424 # left in the graph:
1425 assert next_ordered_changeset < len(ordered_changesets)
1427 id = ordered_changesets[next_ordered_changeset]
1428 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1429 if path:
1430 if Log().is_on(Log.DEBUG):
1431 Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
1432 self.break_segment(path)
1433 else:
1434 # There were no ordered changesets among the reachable
1435 # predecessors, so do generic cycle-breaking:
1436 if Log().is_on(Log.DEBUG):
1437 Log().debug(
1438 'Breaking generic cycle found from %s'
1439 % (self.changeset_db[id],)
1441 self.break_cycle(self.changeset_graph.find_cycle(id))
1443 del self.processed_changeset_logger
1444 self.changeset_graph.close()
1445 self.changeset_graph = None
1446 self.cvs_item_to_changeset_id = None
1447 self.changeset_db = None
1449 Log().quiet("Done")
1452 class TopologicalSortPass(Pass):
1453 """Sort changesets into commit order."""
1455 def register_artifacts(self):
1456 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1457 self._register_temp_file_needed(config.PROJECTS)
1458 self._register_temp_file_needed(config.SYMBOL_DB)
1459 self._register_temp_file_needed(config.CVS_PATHS_DB)
1460 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1461 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1462 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1463 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1464 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1466 def get_source_changesets(self, changeset_db):
1467 for changeset_id in changeset_db.keys():
1468 yield changeset_db[changeset_id]
1470 def get_changesets(self):
1471 """Generate (changeset, timestamp) pairs in commit order."""
1473 changeset_db = ChangesetDatabase(
1474 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1475 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1476 DB_OPEN_READ)
1478 changeset_graph = ChangesetGraph(
1479 changeset_db,
1480 CVSItemToChangesetTable(
1481 artifact_manager.get_temp_file(
1482 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1484 DB_OPEN_READ,
1487 symbol_changeset_ids = set()
1489 for changeset in self.get_source_changesets(changeset_db):
1490 changeset_graph.add_changeset(changeset)
1491 if isinstance(changeset, SymbolChangeset):
1492 symbol_changeset_ids.add(changeset.id)
1494 # Ensure a monotonically-increasing timestamp series by keeping
1495 # track of the previous timestamp and ensuring that the following
1496 # one is larger.
1497 timestamper = Timestamper()
1499 for (changeset, time_range) in changeset_graph.consume_graph():
1500 timestamp = timestamper.get(
1501 time_range.t_max, changeset.id in symbol_changeset_ids
1503 yield (changeset, timestamp)
1505 changeset_graph.close()
1507 def run(self, run_options, stats_keeper):
1508 Log().quiet("Generating CVSRevisions in commit order...")
1510 Ctx()._projects = read_projects(
1511 artifact_manager.get_temp_file(config.PROJECTS)
1513 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1514 Ctx()._symbol_db = SymbolDatabase()
1515 Ctx()._cvs_items_db = IndexedCVSItemStore(
1516 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1517 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1518 DB_OPEN_READ)
1520 sorted_changesets = open(
1521 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1522 'w')
1524 for (changeset, timestamp) in self.get_changesets():
1525 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1527 sorted_changesets.close()
1529 Ctx()._cvs_items_db.close()
1530 Ctx()._symbol_db.close()
1531 Ctx()._cvs_path_db.close()
1533 Log().quiet("Done")
1536 class CreateRevsPass(Pass):
1537 """Generate the SVNCommit <-> CVSRevision mapping databases.
1539 SVNCommitCreator also calls SymbolingsLogger to register
1540 CVSRevisions that represent an opening or closing for a path on a
1541 branch or tag. See SymbolingsLogger for more details.
1543 This pass was formerly known as pass5."""
1545 def register_artifacts(self):
1546 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1547 self._register_temp_file(config.SVN_COMMITS_STORE)
1548 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1549 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1550 self._register_temp_file_needed(config.PROJECTS)
1551 self._register_temp_file_needed(config.CVS_PATHS_DB)
1552 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1553 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1554 self._register_temp_file_needed(config.SYMBOL_DB)
1555 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1556 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1557 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1559 def get_changesets(self):
1560 """Generate (changeset,timestamp,) tuples in commit order."""
1562 changeset_db = ChangesetDatabase(
1563 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1564 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1565 DB_OPEN_READ)
1567 for line in file(
1568 artifact_manager.get_temp_file(
1569 config.CHANGESETS_SORTED_DATAFILE)):
1570 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1571 yield (changeset_db[changeset_id], timestamp)
1573 changeset_db.close()
1575 def get_svn_commits(self, creator):
1576 """Generate the SVNCommits, in order."""
1578 for (changeset, timestamp) in self.get_changesets():
1579 for svn_commit in creator.process_changeset(changeset, timestamp):
1580 yield svn_commit
1582 def log_svn_commit(self, svn_commit):
1583 """Output information about SVN_COMMIT."""
1585 Log().normal(
1586 'Creating Subversion r%d (%s)'
1587 % (svn_commit.revnum, svn_commit.get_description(),)
1590 if isinstance(svn_commit, SVNRevisionCommit):
1591 for cvs_rev in svn_commit.cvs_revs:
1592 Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1594 def run(self, run_options, stats_keeper):
1595 Log().quiet("Mapping CVS revisions to Subversion commits...")
1597 Ctx()._projects = read_projects(
1598 artifact_manager.get_temp_file(config.PROJECTS)
1600 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1601 Ctx()._symbol_db = SymbolDatabase()
1602 Ctx()._cvs_items_db = IndexedCVSItemStore(
1603 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1604 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1605 DB_OPEN_READ)
1607 Ctx()._symbolings_logger = SymbolingsLogger()
1609 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1611 creator = SVNCommitCreator()
1612 for svn_commit in self.get_svn_commits(creator):
1613 self.log_svn_commit(svn_commit)
1614 persistence_manager.put_svn_commit(svn_commit)
1616 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1617 del creator
1619 persistence_manager.close()
1620 Ctx()._symbolings_logger.close()
1621 Ctx()._cvs_items_db.close()
1622 Ctx()._symbol_db.close()
1623 Ctx()._cvs_path_db.close()
1625 Log().quiet("Done")
1628 class SortSymbolOpeningsClosingsPass(Pass):
1629 """This pass was formerly known as pass6."""
1631 def register_artifacts(self):
1632 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1633 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1635 def run(self, run_options, stats_keeper):
1636 Log().quiet("Sorting symbolic name source revisions...")
1638 def sort_key(line):
1639 line = line.split(' ', 2)
1640 return (int(line[0], 16), int(line[1]), line[2],)
1642 sort_file(
1643 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1644 artifact_manager.get_temp_file(
1645 config.SYMBOL_OPENINGS_CLOSINGS_SORTED
1647 key=sort_key,
1648 tempdirs=[Ctx().tmpdir],
1650 Log().quiet("Done")
1653 class IndexSymbolsPass(Pass):
1654 """This pass was formerly known as pass7."""
1656 def register_artifacts(self):
1657 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1658 self._register_temp_file_needed(config.PROJECTS)
1659 self._register_temp_file_needed(config.SYMBOL_DB)
1660 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1662 def generate_offsets_for_symbolings(self):
1663 """This function iterates through all the lines in
1664 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1665 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1666 where SYMBOLIC_NAME is first encountered. This will allow us to
1667 seek to the various offsets in the file and sequentially read only
1668 the openings and closings that we need."""
1670 offsets = {}
1672 f = open(
1673 artifact_manager.get_temp_file(
1674 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1675 'r')
1676 old_id = None
1677 while True:
1678 fpos = f.tell()
1679 line = f.readline()
1680 if not line:
1681 break
1682 id, svn_revnum, ignored = line.split(" ", 2)
1683 id = int(id, 16)
1684 if id != old_id:
1685 Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1686 old_id = id
1687 offsets[id] = fpos
1689 f.close()
1691 offsets_db = file(
1692 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1693 cPickle.dump(offsets, offsets_db, -1)
1694 offsets_db.close()
1696 def run(self, run_options, stats_keeper):
1697 Log().quiet("Determining offsets for all symbolic names...")
1698 Ctx()._projects = read_projects(
1699 artifact_manager.get_temp_file(config.PROJECTS)
1701 Ctx()._symbol_db = SymbolDatabase()
1702 self.generate_offsets_for_symbolings()
1703 Ctx()._symbol_db.close()
1704 Log().quiet("Done.")
1707 class OutputPass(Pass):
1708 """This pass was formerly known as pass8."""
1710 def register_artifacts(self):
1711 self._register_temp_file_needed(config.PROJECTS)
1712 self._register_temp_file_needed(config.CVS_PATHS_DB)
1713 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1714 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1715 self._register_temp_file_needed(config.SYMBOL_DB)
1716 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1717 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1718 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1719 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1720 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1721 Ctx().output_option.register_artifacts(self)
1723 def run(self, run_options, stats_keeper):
1724 Ctx()._projects = read_projects(
1725 artifact_manager.get_temp_file(config.PROJECTS)
1727 Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ)
1728 Ctx()._metadata_db = MetadataDatabase(
1729 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1730 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1731 DB_OPEN_READ,
1733 Ctx()._cvs_items_db = IndexedCVSItemStore(
1734 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1735 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1736 DB_OPEN_READ)
1737 Ctx()._symbol_db = SymbolDatabase()
1738 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_READ)
1740 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1742 svn_revnum = 1
1743 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1744 while svn_commit:
1745 svn_commit.output(Ctx().output_option)
1746 svn_revnum += 1
1747 svn_commit = Ctx()._persistence_manager.get_svn_commit(svn_revnum)
1749 Ctx().output_option.cleanup()
1750 Ctx()._persistence_manager.close()
1752 Ctx()._symbol_db.close()
1753 Ctx()._cvs_items_db.close()
1754 Ctx()._metadata_db.close()
1755 Ctx()._cvs_path_db.close()
1758 # The list of passes constituting a run of cvs2svn:
1759 passes = [
1760 CollectRevsPass(),
1761 CleanMetadataPass(),
1762 CollateSymbolsPass(),
1763 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1764 FilterSymbolsPass(),
1765 SortRevisionsPass(),
1766 SortSymbolsPass(),
1767 InitializeChangesetsPass(),
1768 #CheckIndexedItemStoreDependenciesPass(
1769 # config.CVS_ITEMS_SORTED_STORE,
1770 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1771 BreakRevisionChangesetCyclesPass(),
1772 RevisionTopologicalSortPass(),
1773 BreakSymbolChangesetCyclesPass(),
1774 BreakAllChangesetCyclesPass(),
1775 TopologicalSortPass(),
1776 CreateRevsPass(),
1777 SortSymbolOpeningsClosingsPass(),
1778 IndexSymbolsPass(),
1779 OutputPass(),