1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import logger
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.repository_walker
import walk_repository
78 from cvs2svn_lib
.collect_data
import CollectData
79 from cvs2svn_lib
.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib
.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass
):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self
):
89 self
._register
_temp
_file
(config
.PROJECTS
)
90 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
91 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
92 self
._register
_temp
_file
(config
.METADATA_STORE
)
93 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
94 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
96 def run(self
, run_options
, stats_keeper
):
97 logger
.quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(stats_keeper
)
102 # Key generator for CVSFiles:
103 file_key_generator
= KeyGenerator()
105 for project
in run_options
.projects
:
106 Ctx()._projects
[project
.id] = project
109 walk_repository(project
, file_key_generator
, cd
.record_fatal_error
),
111 run_options
.projects
= None
113 fatal_errors
= cd
.close()
116 raise FatalException("Pass 1 complete.\n"
119 + "\n".join(fatal_errors
) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs
_path
_db
.close()
123 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
127 class CleanMetadataPass(Pass
):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self
):
131 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
132 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
133 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
134 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
136 def _get_clean_author(self
, author
):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
143 return self
._authors
[author
]
148 clean_author
= Ctx().cvs_author_decoder(author
)
150 self
._authors
[author
] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
154 clean_author
= clean_author
.encode('utf8')
156 self
._authors
[author
] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
159 self
._authors
[author
] = clean_author
162 def _get_clean_log_msg(self
, log_msg
):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
169 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
172 'Problem decoding log message:\n'
176 % ('-' * 75, log_msg
, '-' * 75,)
180 return clean_log_msg
.encode('utf8')
183 'Problem encoding log message:\n'
187 % ('-' * 75, log_msg
, '-' * 75,)
190 def _clean_metadata(self
, metadata
):
191 """Clean up METADATA by overwriting its members as necessary."""
194 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
195 except UnicodeError, e
:
196 logger
.warn('%s: %s' % (warning_prefix
, e
,))
200 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
201 except UnicodeError, e
:
202 logger
.warn('%s: %s' % (warning_prefix
, e
,))
205 def run(self
, run_options
, stats_keeper
):
206 logger
.quiet("Converting metadata to UTF8...")
207 metadata_db
= MetadataDatabase(
208 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
209 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
212 metadata_clean_db
= MetadataDatabase(
213 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
214 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
218 self
.warnings
= False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
224 for id in metadata_db
.iterkeys():
225 metadata
= metadata_db
[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata
.original_author
= metadata
.author
231 self
._clean
_metadata
(metadata
)
233 metadata_clean_db
[id] = metadata
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db
.close()
248 class CollateSymbolsPass(Pass
):
249 """Divide symbols into branches, tags, and excludes."""
255 ExcludedSymbol
: 'exclude',
259 def register_artifacts(self
):
260 self
._register
_temp
_file
(config
.SYMBOL_DB
)
261 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
262 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
264 def get_symbol(self
, run_options
, stats
):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
276 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
278 symbol
= rule
.get_symbol(symbol
, stats
)
279 assert symbol
is not None
281 stats
.check_valid(symbol
)
285 def log_symbol_summary(self
, stats
, symbol
):
286 if not self
.symbol_info_file
:
289 if isinstance(symbol
, Trunk
):
291 preferred_parent_name
= '.'
293 name
= stats
.lod
.name
294 if symbol
.preferred_parent_id
is None:
295 preferred_parent_name
= '.'
297 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
298 if isinstance(preferred_parent
, Trunk
):
299 preferred_parent_name
= '.trunk.'
301 preferred_parent_name
= preferred_parent
.name
303 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
304 symbol_path
= symbol
.base_path
308 self
.symbol_info_file
.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats
.lod
.project
.id,
312 self
.conversion_names
[symbol
.__class
__],
314 preferred_parent_name
,
317 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
318 parent_counts
= stats
.possible_parents
.items()
320 self
.symbol_info_file
.write(' # Possible parents:\n')
321 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
322 for (pp
, count
) in parent_counts
:
323 if isinstance(pp
, Trunk
):
324 self
.symbol_info_file
.write(
325 ' # .trunk. : %d\n' % (count
,)
328 self
.symbol_info_file
.write(
329 ' # %s : %d\n' % (pp
.name
, count
,)
332 def get_symbols(self
, run_options
):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
341 Raise FatalError if there was an error."""
346 if Ctx().symbol_info_filename
is not None:
347 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
348 self
.symbol_info_file
.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
353 self
.symbol_info_file
= None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
359 for rule_list
in run_options
.project_symbol_strategy_rules
:
360 for rule
in rule_list
:
361 rules
[id(rule
)] = rule
363 for rule
in rules
.itervalues():
364 rule
.start(self
.symbol_stats
)
368 for stats
in self
.symbol_stats
:
370 symbol
= self
.get_symbol(run_options
, stats
)
371 except IndeterminateSymbolException
, e
:
372 self
.log_symbol_summary(stats
, stats
.lod
)
373 mismatches
.append(e
.stats
)
374 except SymbolPlanError
, e
:
375 self
.log_symbol_summary(stats
, stats
.lod
)
378 self
.log_symbol_summary(stats
, symbol
)
379 retval
[stats
.lod
] = symbol
381 for rule
in rules
.itervalues():
384 if self
.symbol_info_file
:
385 self
.symbol_info_file
.close()
387 del self
.symbol_info_file
389 if errors
or mismatches
:
390 s
= ['Problems determining how symbols should be converted:\n']
392 s
.append('%s\n' % (e
,))
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats
in mismatches
:
402 s
.append(' %s\n' % (stats
,))
403 raise FatalError(''.join(s
))
407 def run(self
, run_options
, stats_keeper
):
408 Ctx()._projects
= read_projects(
409 artifact_manager
.get_temp_file(config
.PROJECTS
)
411 self
.symbol_stats
= SymbolStatistics(
412 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
415 symbol_map
= self
.get_symbols(run_options
)
417 # Check the symbols for consistency and bail out if there were errors:
418 self
.symbol_stats
.check_consistency(symbol_map
)
420 # Check that the symbols all have SVN paths set and that the paths
422 Ctx().output_option
.check_symbols(symbol_map
)
424 for symbol
in symbol_map
.itervalues():
425 if isinstance(symbol
, ExcludedSymbol
):
426 self
.symbol_stats
.exclude_symbol(symbol
)
428 create_symbol_database(symbol_map
.values())
430 del self
.symbol_stats
435 class FilterSymbolsPass(Pass
):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self
):
442 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
443 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
444 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
445 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
446 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
447 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
448 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
449 Ctx().revision_collector
.register_artifacts(self
)
451 def run(self
, run_options
, stats_keeper
):
452 Ctx()._projects
= read_projects(
453 artifact_manager
.get_temp_file(config
.PROJECTS
)
455 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
456 Ctx()._symbol
_db
= SymbolDatabase()
457 cvs_item_store
= OldCVSItemStore(
458 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
460 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
461 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
462 cPickle
.dump(cvs_item_serializer
, f
, -1)
465 rev_db
= NewSortableCVSRevisionDatabase(
466 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
470 symbol_db
= NewSortableCVSSymbolDatabase(
471 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
475 revision_collector
= Ctx().revision_collector
477 logger
.quiet("Filtering out excluded symbols and summarizing items...")
479 stats_keeper
.reset_cvs_rev_info()
480 revision_collector
.start()
482 # Process the cvs items store one file at a time:
483 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
484 logger
.verbose(cvs_file_items
.cvs_file
.rcs_path
)
485 cvs_file_items
.filter_excluded_symbols()
486 cvs_file_items
.mutate_symbols()
487 cvs_file_items
.adjust_parents()
488 cvs_file_items
.refine_symbols()
489 cvs_file_items
.determine_revision_properties(
490 Ctx().revision_property_setters
492 cvs_file_items
.record_opened_symbols()
493 cvs_file_items
.record_closed_symbols()
494 cvs_file_items
.check_link_consistency()
496 # Give the revision collector a chance to collect data about the
498 revision_collector
.process_file(cvs_file_items
)
500 # Store whatever is left to the new file and update statistics:
501 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
502 for cvs_item
in cvs_file_items
.values():
503 stats_keeper
.record_cvs_item(cvs_item
)
505 if isinstance(cvs_item
, CVSRevision
):
507 elif isinstance(cvs_item
, CVSSymbol
):
508 symbol_db
.add(cvs_item
)
510 stats_keeper
.set_stats_reflect_exclude(True)
514 revision_collector
.finish()
515 cvs_item_store
.close()
516 Ctx()._symbol
_db
.close()
517 Ctx()._cvs
_path
_db
.close()
522 class SortRevisionsPass(Pass
):
523 """Sort the revisions file."""
525 def register_artifacts(self
):
526 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
527 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
529 def run(self
, run_options
, stats_keeper
):
530 logger
.quiet("Sorting CVS revision summaries...")
532 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
533 artifact_manager
.get_temp_file(
534 config
.CVS_REVS_SORTED_DATAFILE
536 tempdirs
=[Ctx().tmpdir
],
541 class SortSymbolsPass(Pass
):
542 """Sort the symbols file."""
544 def register_artifacts(self
):
545 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
546 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
548 def run(self
, run_options
, stats_keeper
):
549 logger
.quiet("Sorting CVS symbol summaries...")
551 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
552 artifact_manager
.get_temp_file(
553 config
.CVS_SYMBOLS_SORTED_DATAFILE
555 tempdirs
=[Ctx().tmpdir
],
560 class InitializeChangesetsPass(Pass
):
561 """Create preliminary CommitSets."""
563 def register_artifacts(self
):
564 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
565 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
566 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
567 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
568 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
569 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
570 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
571 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
572 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
573 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
574 self
._register
_temp
_file
_needed
(
575 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
577 def get_revision_changesets(self
):
578 """Generate revision changesets, one at a time.
580 Each time, yield a list of CVSRevisions that might potentially
581 consititute a changeset."""
583 # Create changesets for CVSRevisions:
584 old_metadata_id
= None
588 db
= OldSortableCVSRevisionDatabase(
589 artifact_manager
.get_temp_file(
590 config
.CVS_REVS_SORTED_DATAFILE
592 self
.cvs_item_serializer
,
596 if cvs_rev
.metadata_id
!= old_metadata_id \
597 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
598 # Start a new changeset. First finish up the old changeset,
601 yield changeset_items
603 old_metadata_id
= cvs_rev
.metadata_id
604 changeset_items
.append(cvs_rev
)
605 old_timestamp
= cvs_rev
.timestamp
607 # Finish up the last changeset, if any:
609 yield changeset_items
611 def get_symbol_changesets(self
):
612 """Generate symbol changesets, one at a time.
614 Each time, yield a list of CVSSymbols that might potentially
615 consititute a changeset."""
620 db
= OldSortableCVSSymbolDatabase(
621 artifact_manager
.get_temp_file(
622 config
.CVS_SYMBOLS_SORTED_DATAFILE
624 self
.cvs_item_serializer
,
627 for cvs_symbol
in db
:
628 if cvs_symbol
.symbol
.id != old_symbol_id
:
629 # Start a new changeset. First finish up the old changeset,
632 yield changeset_items
634 old_symbol_id
= cvs_symbol
.symbol
.id
635 changeset_items
.append(cvs_symbol
)
637 # Finish up the last changeset, if any:
639 yield changeset_items
642 def compare_items(a
, b
):
644 cmp(a
.timestamp
, b
.timestamp
)
645 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
646 or cmp([int(x
) for x
in a
.rev
.split('.')],
647 [int(x
) for x
in b
.rev
.split('.')])
650 def break_internal_dependencies(self
, changeset_items
):
651 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
653 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
654 belong in a single RevisionChangeset, but there might be internal
655 dependencies among the items. Return a list of lists, where each
656 sublist is a list of CVSRevisions and at least one internal
657 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
658 to be split, then the return value will contain a single value,
659 namely the original value of CHANGESET_ITEMS. Split
660 CHANGESET_ITEMS at most once, even though the resulting changesets
661 might themselves have internal dependencies."""
663 # We only look for succ dependencies, since by doing so we
664 # automatically cover pred dependencies as well. First create a
665 # list of tuples (pred, succ) of id pairs for CVSItems that depend
668 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
669 for cvs_item
in changeset_items
:
670 for next_id
in cvs_item
.get_succ_ids():
671 if next_id
in changeset_cvs_item_ids
:
672 # Sanity check: a CVSItem should never depend on itself:
673 if next_id
== cvs_item
.id:
674 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
676 dependencies
.append((cvs_item
.id, next_id
,))
679 # Sort the changeset_items in a defined order (chronological to the
680 # extent that the timestamps are correct and unique).
681 changeset_items
.sort(self
.compare_items
)
683 for (i
, changeset_item
) in enumerate(changeset_items
):
684 indexes
[changeset_item
.id] = i
685 # How many internal dependencies would be broken by breaking the
686 # Changeset after a particular index?
687 breaks
= [0] * len(changeset_items
)
688 for (pred
, succ
,) in dependencies
:
689 pred_index
= indexes
[pred
]
690 succ_index
= indexes
[succ
]
691 breaks
[min(pred_index
, succ_index
)] += 1
692 breaks
[max(pred_index
, succ_index
)] -= 1
696 for i
in range(1, len(breaks
)):
697 breaks
[i
] += breaks
[i
- 1]
698 for i
in range(0, len(breaks
) - 1):
699 if breaks
[i
] > best_count
:
701 best_count
= breaks
[i
]
702 best_time
= (changeset_items
[i
+ 1].timestamp
703 - changeset_items
[i
].timestamp
)
704 elif breaks
[i
] == best_count \
705 and (changeset_items
[i
+ 1].timestamp
706 - changeset_items
[i
].timestamp
) < best_time
:
708 best_count
= breaks
[i
]
709 best_time
= (changeset_items
[i
+ 1].timestamp
710 - changeset_items
[i
].timestamp
)
711 # Reuse the old changeset.id for the first of the split changesets.
712 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
714 return [changeset_items
]
716 def break_all_internal_dependencies(self
, changeset_items
):
717 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
719 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
720 be part of a single changeset. Break this list into sublists,
721 where the CVSRevisions in each sublist are free of mutual
724 # This method is written non-recursively to avoid any possible
725 # problems with recursion depth.
727 changesets_to_split
= [changeset_items
]
728 while changesets_to_split
:
729 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
730 if len(changesets
) == 1:
731 [changeset_items
] = changesets
732 yield changeset_items
734 # The changeset had to be split; see if either of the
735 # fragments have to be split:
737 changesets_to_split
.extend(changesets
)
739 def get_changesets(self
):
740 """Generate (Changeset, [CVSItem,...]) for all changesets.
742 The Changesets already have their internal dependencies broken.
743 The [CVSItem,...] list is the list of CVSItems in the
744 corresponding Changeset."""
746 for changeset_items
in self
.get_revision_changesets():
747 for split_changeset_items \
748 in self
.break_all_internal_dependencies(changeset_items
):
751 self
.changeset_key_generator
.gen_id(),
752 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
754 split_changeset_items
,
757 for changeset_items
in self
.get_symbol_changesets():
759 create_symbol_changeset(
760 self
.changeset_key_generator
.gen_id(),
761 changeset_items
[0].symbol
,
762 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
767 def run(self
, run_options
, stats_keeper
):
768 logger
.quiet("Creating preliminary commit sets...")
770 Ctx()._projects
= read_projects(
771 artifact_manager
.get_temp_file(config
.PROJECTS
)
773 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
774 Ctx()._symbol
_db
= SymbolDatabase()
776 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
777 self
.cvs_item_serializer
= cPickle
.load(f
)
780 changeset_db
= ChangesetDatabase(
781 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
782 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
785 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
786 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
790 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
791 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
792 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
795 self
.changeset_key_generator
= KeyGenerator()
797 for (changeset
, changeset_items
) in self
.get_changesets():
798 if logger
.is_on(logger
.DEBUG
):
799 logger
.debug(repr(changeset
))
800 changeset_db
.store(changeset
)
801 for cvs_item
in changeset_items
:
802 self
.sorted_cvs_items_db
.add(cvs_item
)
803 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
805 self
.sorted_cvs_items_db
.close()
806 cvs_item_to_changeset_id
.close()
808 Ctx()._symbol
_db
.close()
809 Ctx()._cvs
_path
_db
.close()
811 del self
.cvs_item_serializer
816 class ProcessedChangesetLogger
:
818 self
.processed_changeset_ids
= []
820 def log(self
, changeset_id
):
821 if logger
.is_on(logger
.DEBUG
):
822 self
.processed_changeset_ids
.append(changeset_id
)
825 if self
.processed_changeset_ids
:
827 'Consumed changeset ids %s'
828 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
830 del self
.processed_changeset_ids
[:]
833 class BreakRevisionChangesetCyclesPass(Pass
):
834 """Break up any dependency cycles involving only RevisionChangesets."""
836 def register_artifacts(self
):
837 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
838 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
839 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
840 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
841 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
842 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
843 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
844 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
845 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
846 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
847 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
849 def get_source_changesets(self
):
850 old_changeset_db
= ChangesetDatabase(
851 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
852 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
855 changeset_ids
= old_changeset_db
.keys()
857 for changeset_id
in changeset_ids
:
858 yield old_changeset_db
[changeset_id
]
860 old_changeset_db
.close()
863 def break_cycle(self
, cycle
):
864 """Break up one or more changesets in CYCLE to help break the cycle.
866 CYCLE is a list of Changesets where
868 cycle[i] depends on cycle[i - 1]
870 Break up one or more changesets in CYCLE to make progress towards
871 breaking the cycle. Update self.changeset_graph accordingly.
873 It is not guaranteed that the cycle will be broken by one call to
874 this routine, but at least some progress must be made."""
876 self
.processed_changeset_logger
.flush()
879 for i
in range(len(cycle
)):
880 # It's OK if this index wraps to -1:
881 link
= ChangesetGraphLink(
882 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
884 if best_i
is None or link
< best_link
:
888 if logger
.is_on(logger
.DEBUG
):
890 'Breaking cycle %s by breaking node %x' % (
891 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
892 best_link
.changeset
.id,))
894 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
896 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
898 for changeset
in new_changesets
:
899 self
.changeset_graph
.add_new_changeset(changeset
)
901 def run(self
, run_options
, stats_keeper
):
902 logger
.quiet("Breaking revision changeset dependency cycles...")
904 Ctx()._projects
= read_projects(
905 artifact_manager
.get_temp_file(config
.PROJECTS
)
907 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
908 Ctx()._symbol
_db
= SymbolDatabase()
909 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
910 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
911 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
915 artifact_manager
.get_temp_file(
916 config
.CVS_ITEM_TO_CHANGESET
),
917 artifact_manager
.get_temp_file(
918 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
919 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
920 artifact_manager
.get_temp_file(
921 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
924 changeset_db
= ChangesetDatabase(
925 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
926 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
929 self
.changeset_graph
= ChangesetGraph(
930 changeset_db
, cvs_item_to_changeset_id
934 for changeset
in self
.get_source_changesets():
935 changeset_db
.store(changeset
)
936 if isinstance(changeset
, RevisionChangeset
):
937 self
.changeset_graph
.add_changeset(changeset
)
938 max_changeset_id
= max(max_changeset_id
, changeset
.id)
940 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
942 self
.processed_changeset_logger
= ProcessedChangesetLogger()
944 # Consume the graph, breaking cycles using self.break_cycle():
945 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
946 cycle_breaker
=self
.break_cycle
948 self
.processed_changeset_logger
.log(changeset
.id)
950 self
.processed_changeset_logger
.flush()
951 del self
.processed_changeset_logger
953 self
.changeset_graph
.close()
954 self
.changeset_graph
= None
955 Ctx()._cvs
_items
_db
.close()
956 Ctx()._symbol
_db
.close()
957 Ctx()._cvs
_path
_db
.close()
962 class RevisionTopologicalSortPass(Pass
):
963 """Sort RevisionChangesets into commit order.
965 Also convert them to OrderedChangesets, without changing their ids."""
967 def register_artifacts(self
):
968 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
969 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
970 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
971 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
972 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
973 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
974 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
975 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
976 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
977 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
979 def get_source_changesets(self
, changeset_db
):
980 changeset_ids
= changeset_db
.keys()
982 for changeset_id
in changeset_ids
:
983 yield changeset_db
[changeset_id
]
985 def get_changesets(self
):
986 changeset_db
= ChangesetDatabase(
987 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
988 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
992 changeset_graph
= ChangesetGraph(
994 CVSItemToChangesetTable(
995 artifact_manager
.get_temp_file(
996 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
1002 for changeset
in self
.get_source_changesets(changeset_db
):
1003 if isinstance(changeset
, RevisionChangeset
):
1004 changeset_graph
.add_changeset(changeset
)
1011 changeset_ids
.append(None)
1013 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1014 changeset_ids
.append(changeset
.id)
1017 changeset_ids
.append(None)
1019 for i
in range(1, len(changeset_ids
) - 1):
1020 changeset
= changeset_db
[changeset_ids
[i
]]
1021 yield OrderedChangeset(
1022 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1023 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1025 changeset_graph
.close()
1027 def run(self
, run_options
, stats_keeper
):
1028 logger
.quiet("Generating CVSRevisions in commit order...")
1030 Ctx()._projects
= read_projects(
1031 artifact_manager
.get_temp_file(config
.PROJECTS
)
1033 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1034 Ctx()._symbol
_db
= SymbolDatabase()
1035 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1036 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1037 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1040 changesets_revordered_db
= ChangesetDatabase(
1041 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1042 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1045 for changeset
in self
.get_changesets():
1046 changesets_revordered_db
.store(changeset
)
1048 changesets_revordered_db
.close()
1049 Ctx()._cvs
_items
_db
.close()
1050 Ctx()._symbol
_db
.close()
1051 Ctx()._cvs
_path
_db
.close()
1053 logger
.quiet("Done")
1056 class BreakSymbolChangesetCyclesPass(Pass
):
1057 """Break up any dependency cycles involving only SymbolChangesets."""
1059 def register_artifacts(self
):
1060 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1061 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1062 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1063 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1064 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1065 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1066 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1067 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1068 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1069 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1070 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1072 def get_source_changesets(self
):
1073 old_changeset_db
= ChangesetDatabase(
1074 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1075 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1078 changeset_ids
= old_changeset_db
.keys()
1080 for changeset_id
in changeset_ids
:
1081 yield old_changeset_db
[changeset_id
]
1083 old_changeset_db
.close()
1085 def break_cycle(self
, cycle
):
1086 """Break up one or more changesets in CYCLE to help break the cycle.
1088 CYCLE is a list of Changesets where
1090 cycle[i] depends on cycle[i - 1]
1092 Break up one or more changesets in CYCLE to make progress towards
1093 breaking the cycle. Update self.changeset_graph accordingly.
1095 It is not guaranteed that the cycle will be broken by one call to
1096 this routine, but at least some progress must be made."""
1098 self
.processed_changeset_logger
.flush()
1101 for i
in range(len(cycle
)):
1102 # It's OK if this index wraps to -1:
1103 link
= ChangesetGraphLink(
1104 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1106 if best_i
is None or link
< best_link
:
1110 if logger
.is_on(logger
.DEBUG
):
1112 'Breaking cycle %s by breaking node %x' % (
1113 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1114 best_link
.changeset
.id,))
1116 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1118 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1120 for changeset
in new_changesets
:
1121 self
.changeset_graph
.add_new_changeset(changeset
)
1123 def run(self
, run_options
, stats_keeper
):
1124 logger
.quiet("Breaking symbol changeset dependency cycles...")
1126 Ctx()._projects
= read_projects(
1127 artifact_manager
.get_temp_file(config
.PROJECTS
)
1129 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1130 Ctx()._symbol
_db
= SymbolDatabase()
1131 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1132 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1133 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1137 artifact_manager
.get_temp_file(
1138 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1139 artifact_manager
.get_temp_file(
1140 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1141 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1142 artifact_manager
.get_temp_file(
1143 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1146 changeset_db
= ChangesetDatabase(
1147 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1148 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1151 self
.changeset_graph
= ChangesetGraph(
1152 changeset_db
, cvs_item_to_changeset_id
1155 max_changeset_id
= 0
1156 for changeset
in self
.get_source_changesets():
1157 changeset_db
.store(changeset
)
1158 if isinstance(changeset
, SymbolChangeset
):
1159 self
.changeset_graph
.add_changeset(changeset
)
1160 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1162 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1164 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1166 # Consume the graph, breaking cycles using self.break_cycle():
1167 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1168 cycle_breaker
=self
.break_cycle
1170 self
.processed_changeset_logger
.log(changeset
.id)
1172 self
.processed_changeset_logger
.flush()
1173 del self
.processed_changeset_logger
1175 self
.changeset_graph
.close()
1176 self
.changeset_graph
= None
1177 Ctx()._cvs
_items
_db
.close()
1178 Ctx()._symbol
_db
.close()
1179 Ctx()._cvs
_path
_db
.close()
1181 logger
.quiet("Done")
1184 class BreakAllChangesetCyclesPass(Pass
):
1185 """Break up any dependency cycles that are closed by SymbolChangesets."""
1187 def register_artifacts(self
):
1188 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1189 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1190 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1191 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1192 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1193 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1194 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1195 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1196 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1197 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1198 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1200 def get_source_changesets(self
):
1201 old_changeset_db
= ChangesetDatabase(
1202 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1203 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1206 changeset_ids
= old_changeset_db
.keys()
1208 for changeset_id
in changeset_ids
:
1209 yield old_changeset_db
[changeset_id
]
1211 old_changeset_db
.close()
1213 def _split_retrograde_changeset(self
, changeset
):
1214 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1216 logger
.debug('Breaking retrograde changeset %x' % (changeset
.id,))
1218 self
.changeset_graph
.delete_changeset(changeset
)
1220 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1222 for cvs_branch
in changeset
.iter_cvs_items():
1223 max_pred_ordinal
= 0
1224 min_succ_ordinal
= sys
.maxint
1226 for pred_id
in cvs_branch
.get_pred_ids():
1227 pred_ordinal
= self
.ordinals
.get(
1228 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1229 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1231 for succ_id
in cvs_branch
.get_succ_ids():
1232 succ_ordinal
= self
.ordinals
.get(
1233 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1234 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1236 assert max_pred_ordinal
< min_succ_ordinal
1237 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1239 # Find the earliest successor ordinal:
1240 min_min_succ_ordinal
= sys
.maxint
1241 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1242 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1246 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1247 if max_pred_ordinal
>= min_min_succ_ordinal
:
1248 late_item_ids
.append(id)
1250 early_item_ids
.append(id)
1252 assert early_item_ids
1253 assert late_item_ids
1255 early_changeset
= changeset
.create_split_changeset(
1256 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1257 late_changeset
= changeset
.create_split_changeset(
1258 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1260 self
.changeset_graph
.add_new_changeset(early_changeset
)
1261 self
.changeset_graph
.add_new_changeset(late_changeset
)
1263 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1265 # Because of the way we constructed it, the early changeset should
1266 # not have to be split:
1267 assert not early_split
1269 self
._split
_if
_retrograde
(late_changeset
.id)
1271 def _split_if_retrograde(self
, changeset_id
):
1272 node
= self
.changeset_graph
[changeset_id
]
1275 for id in node
.pred_ids
1276 if id in self
.ordinals
1278 pred_ordinals
.sort()
1281 for id in node
.succ_ids
1282 if id in self
.ordinals
1284 succ_ordinals
.sort()
1285 if pred_ordinals
and succ_ordinals \
1286 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1287 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1292 def break_segment(self
, segment
):
1293 """Break a changeset in SEGMENT[1:-1].
1295 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1296 that range are SymbolChangesets."""
1300 for i
in range(1, len(segment
) - 1):
1301 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1303 if best_i
is None or link
< best_link
:
1307 if logger
.is_on(logger
.DEBUG
):
1309 'Breaking segment %s by breaking node %x' % (
1310 ' -> '.join(['%x' % node
.id for node
in segment
]),
1311 best_link
.changeset
.id,))
1313 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1315 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1317 for changeset
in new_changesets
:
1318 self
.changeset_graph
.add_new_changeset(changeset
)
1320 def break_cycle(self
, cycle
):
1321 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1323 CYCLE is a list of SymbolChangesets where
1325 cycle[i] depends on cycle[i - 1]
1327 . Break up one or more changesets in CYCLE to make progress
1328 towards breaking the cycle. Update self.changeset_graph
1331 It is not guaranteed that the cycle will be broken by one call to
1332 this routine, but at least some progress must be made."""
1334 if logger
.is_on(logger
.DEBUG
):
1336 'Breaking cycle %s' % (
1337 ' -> '.join(['%x' % changeset
.id
1338 for changeset
in cycle
+ [cycle
[0]]]),))
1340 # Unwrap the cycle into a segment then break the segment:
1341 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1343 def run(self
, run_options
, stats_keeper
):
1344 logger
.quiet("Breaking CVSSymbol dependency loops...")
1346 Ctx()._projects
= read_projects(
1347 artifact_manager
.get_temp_file(config
.PROJECTS
)
1349 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1350 Ctx()._symbol
_db
= SymbolDatabase()
1351 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1352 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1353 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1357 artifact_manager
.get_temp_file(
1358 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1359 artifact_manager
.get_temp_file(
1360 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1361 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1362 artifact_manager
.get_temp_file(
1363 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1366 self
.changeset_db
= ChangesetDatabase(
1367 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1368 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1371 self
.changeset_graph
= ChangesetGraph(
1372 self
.changeset_db
, self
.cvs_item_to_changeset_id
1375 # A map {changeset_id : ordinal} for OrderedChangesets:
1377 # A map {ordinal : changeset_id}:
1378 ordered_changeset_map
= {}
1379 # A list of all BranchChangeset ids:
1380 branch_changeset_ids
= []
1381 max_changeset_id
= 0
1382 for changeset
in self
.get_source_changesets():
1383 self
.changeset_db
.store(changeset
)
1384 self
.changeset_graph
.add_changeset(changeset
)
1385 if isinstance(changeset
, OrderedChangeset
):
1386 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1387 self
.ordinals
[changeset
.id] = changeset
.ordinal
1388 elif isinstance(changeset
, BranchChangeset
):
1389 branch_changeset_ids
.append(changeset
.id)
1390 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1392 # An array of ordered_changeset ids, indexed by ordinal:
1393 ordered_changesets
= []
1394 for ordinal
in range(len(ordered_changeset_map
)):
1395 id = ordered_changeset_map
[ordinal
]
1396 ordered_changesets
.append(id)
1398 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1399 del ordered_changeset_map
1401 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1403 # First we scan through all BranchChangesets looking for
1404 # changesets that are individually "retrograde" and splitting
1406 for changeset_id
in branch_changeset_ids
:
1407 self
._split
_if
_retrograde
(changeset_id
)
1411 next_ordered_changeset
= 0
1413 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1415 while self
.changeset_graph
:
1416 # Consume any nodes that don't have predecessors:
1417 for (changeset
, time_range
) \
1418 in self
.changeset_graph
.consume_nopred_nodes():
1419 self
.processed_changeset_logger
.log(changeset
.id)
1420 if changeset
.id in ordered_changeset_ids
:
1421 next_ordered_changeset
+= 1
1422 ordered_changeset_ids
.remove(changeset
.id)
1424 self
.processed_changeset_logger
.flush()
1426 if not self
.changeset_graph
:
1429 # Now work on the next ordered changeset that has not yet been
1430 # processed. BreakSymbolChangesetCyclesPass has broken any
1431 # cycles involving only SymbolChangesets, so the presence of a
1432 # cycle implies that there is at least one ordered changeset
1433 # left in the graph:
1434 assert next_ordered_changeset
< len(ordered_changesets
)
1436 id = ordered_changesets
[next_ordered_changeset
]
1437 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1439 if logger
.is_on(logger
.DEBUG
):
1440 logger
.debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1441 self
.break_segment(path
)
1443 # There were no ordered changesets among the reachable
1444 # predecessors, so do generic cycle-breaking:
1445 if logger
.is_on(logger
.DEBUG
):
1447 'Breaking generic cycle found from %s'
1448 % (self
.changeset_db
[id],)
1450 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1452 del self
.processed_changeset_logger
1453 self
.changeset_graph
.close()
1454 self
.changeset_graph
= None
1455 self
.cvs_item_to_changeset_id
= None
1456 self
.changeset_db
= None
1458 logger
.quiet("Done")
1461 class TopologicalSortPass(Pass
):
1462 """Sort changesets into commit order."""
1464 def register_artifacts(self
):
1465 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1466 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1467 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1468 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1469 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1470 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1471 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1472 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1473 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1475 def get_source_changesets(self
, changeset_db
):
1476 for changeset_id
in changeset_db
.keys():
1477 yield changeset_db
[changeset_id
]
1479 def get_changesets(self
):
1480 """Generate (changeset, timestamp) pairs in commit order."""
1482 changeset_db
= ChangesetDatabase(
1483 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1484 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1487 changeset_graph
= ChangesetGraph(
1489 CVSItemToChangesetTable(
1490 artifact_manager
.get_temp_file(
1491 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1496 symbol_changeset_ids
= set()
1498 for changeset
in self
.get_source_changesets(changeset_db
):
1499 changeset_graph
.add_changeset(changeset
)
1500 if isinstance(changeset
, SymbolChangeset
):
1501 symbol_changeset_ids
.add(changeset
.id)
1503 # Ensure a monotonically-increasing timestamp series by keeping
1504 # track of the previous timestamp and ensuring that the following
1506 timestamper
= Timestamper()
1508 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1509 timestamp
= timestamper
.get(
1510 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1512 yield (changeset
, timestamp
)
1514 changeset_graph
.close()
1516 def run(self
, run_options
, stats_keeper
):
1517 logger
.quiet("Generating CVSRevisions in commit order...")
1519 Ctx()._projects
= read_projects(
1520 artifact_manager
.get_temp_file(config
.PROJECTS
)
1522 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1523 Ctx()._symbol
_db
= SymbolDatabase()
1524 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1525 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1526 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1529 sorted_changesets
= open(
1530 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1533 for (changeset
, timestamp
) in self
.get_changesets():
1534 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1536 sorted_changesets
.close()
1538 Ctx()._cvs
_items
_db
.close()
1539 Ctx()._symbol
_db
.close()
1540 Ctx()._cvs
_path
_db
.close()
1542 logger
.quiet("Done")
1545 class CreateRevsPass(Pass
):
1546 """Generate the SVNCommit <-> CVSRevision mapping databases.
1548 SVNCommitCreator also calls SymbolingsLogger to register
1549 CVSRevisions that represent an opening or closing for a path on a
1550 branch or tag. See SymbolingsLogger for more details.
1552 This pass was formerly known as pass5."""
1554 def register_artifacts(self
):
1555 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1556 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1557 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1558 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1559 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1560 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1561 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1562 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1563 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1564 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1565 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1566 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1568 def get_changesets(self
):
1569 """Generate (changeset,timestamp,) tuples in commit order."""
1571 changeset_db
= ChangesetDatabase(
1572 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1573 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1577 artifact_manager
.get_temp_file(
1578 config
.CHANGESETS_SORTED_DATAFILE
)):
1579 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1580 yield (changeset_db
[changeset_id
], timestamp
)
1582 changeset_db
.close()
1584 def get_svn_commits(self
, creator
):
1585 """Generate the SVNCommits, in order."""
1587 for (changeset
, timestamp
) in self
.get_changesets():
1588 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1591 def log_svn_commit(self
, svn_commit
):
1592 """Output information about SVN_COMMIT."""
1595 'Creating Subversion r%d (%s)'
1596 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1599 if isinstance(svn_commit
, SVNRevisionCommit
):
1600 for cvs_rev
in svn_commit
.cvs_revs
:
1601 logger
.verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1603 def run(self
, run_options
, stats_keeper
):
1604 logger
.quiet("Mapping CVS revisions to Subversion commits...")
1606 Ctx()._projects
= read_projects(
1607 artifact_manager
.get_temp_file(config
.PROJECTS
)
1609 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1610 Ctx()._symbol
_db
= SymbolDatabase()
1611 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1612 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1613 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1616 Ctx()._symbolings
_logger
= SymbolingsLogger()
1618 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1620 creator
= SVNCommitCreator()
1621 for svn_commit
in self
.get_svn_commits(creator
):
1622 self
.log_svn_commit(svn_commit
)
1623 persistence_manager
.put_svn_commit(svn_commit
)
1625 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1628 persistence_manager
.close()
1629 Ctx()._symbolings
_logger
.close()
1630 Ctx()._cvs
_items
_db
.close()
1631 Ctx()._symbol
_db
.close()
1632 Ctx()._cvs
_path
_db
.close()
1634 logger
.quiet("Done")
1637 class SortSymbolOpeningsClosingsPass(Pass
):
1638 """This pass was formerly known as pass6."""
1640 def register_artifacts(self
):
1641 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1642 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1644 def run(self
, run_options
, stats_keeper
):
1645 logger
.quiet("Sorting symbolic name source revisions...")
1648 line
= line
.split(' ', 2)
1649 return (int(line
[0], 16), int(line
[1]), line
[2],)
1652 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1653 artifact_manager
.get_temp_file(
1654 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1657 tempdirs
=[Ctx().tmpdir
],
1659 logger
.quiet("Done")
1662 class IndexSymbolsPass(Pass
):
1663 """This pass was formerly known as pass7."""
1665 def register_artifacts(self
):
1666 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1667 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1668 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1669 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1671 def generate_offsets_for_symbolings(self
):
1672 """This function iterates through all the lines in
1673 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1674 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1675 where SYMBOLIC_NAME is first encountered. This will allow us to
1676 seek to the various offsets in the file and sequentially read only
1677 the openings and closings that we need."""
1682 artifact_manager
.get_temp_file(
1683 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1691 id, svn_revnum
, ignored
= line
.split(" ", 2)
1694 logger
.verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1701 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1702 cPickle
.dump(offsets
, offsets_db
, -1)
1705 def run(self
, run_options
, stats_keeper
):
1706 logger
.quiet("Determining offsets for all symbolic names...")
1707 Ctx()._projects
= read_projects(
1708 artifact_manager
.get_temp_file(config
.PROJECTS
)
1710 Ctx()._symbol
_db
= SymbolDatabase()
1711 self
.generate_offsets_for_symbolings()
1712 Ctx()._symbol
_db
.close()
1713 logger
.quiet("Done.")
1716 class OutputPass(Pass
):
1717 """This pass was formerly known as pass8."""
1719 def register_artifacts(self
):
1720 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1721 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1722 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1723 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1724 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1725 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1726 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1727 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1728 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1729 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1730 Ctx().output_option
.register_artifacts(self
)
1732 def run(self
, run_options
, stats_keeper
):
1733 Ctx()._projects
= read_projects(
1734 artifact_manager
.get_temp_file(config
.PROJECTS
)
1736 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1737 Ctx()._metadata
_db
= MetadataDatabase(
1738 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1739 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1742 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1743 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1744 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1746 Ctx()._symbol
_db
= SymbolDatabase()
1747 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1749 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1752 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1754 svn_commit
.output(Ctx().output_option
)
1756 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1758 Ctx().output_option
.cleanup()
1759 Ctx()._persistence
_manager
.close()
1761 Ctx()._symbol
_db
.close()
1762 Ctx()._cvs
_items
_db
.close()
1763 Ctx()._metadata
_db
.close()
1764 Ctx()._cvs
_path
_db
.close()
1767 # The list of passes constituting a run of cvs2svn:
1770 CleanMetadataPass(),
1771 CollateSymbolsPass(),
1772 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1773 FilterSymbolsPass(),
1774 SortRevisionsPass(),
1776 InitializeChangesetsPass(),
1777 #CheckIndexedItemStoreDependenciesPass(
1778 # config.CVS_ITEMS_SORTED_STORE,
1779 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1780 BreakRevisionChangesetCyclesPass(),
1781 RevisionTopologicalSortPass(),
1782 BreakSymbolChangesetCyclesPass(),
1783 BreakAllChangesetCyclesPass(),
1784 TopologicalSortPass(),
1786 SortSymbolOpeningsClosingsPass(),