1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import logger
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.repository_walker
import walk_repository
78 from cvs2svn_lib
.collect_data
import CollectData
79 from cvs2svn_lib
.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib
.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass
):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self
):
89 self
._register
_temp
_file
(config
.PROJECTS
)
90 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
91 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
92 self
._register
_temp
_file
(config
.METADATA_STORE
)
93 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
94 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
96 def run(self
, run_options
, stats_keeper
):
97 logger
.quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(stats_keeper
)
102 # Key generator for CVSFiles:
103 file_key_generator
= KeyGenerator()
105 for project
in run_options
.projects
:
106 Ctx()._projects
[project
.id] = project
109 walk_repository(project
, file_key_generator
, cd
.record_fatal_error
),
111 run_options
.projects
= None
113 fatal_errors
= cd
.close()
116 raise FatalException("Pass 1 complete.\n"
119 + "\n".join(fatal_errors
) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs
_path
_db
.close()
123 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
127 class CleanMetadataPass(Pass
):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self
):
131 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
132 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
133 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
134 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
136 def _get_clean_author(self
, author
):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
143 return self
._authors
[author
]
148 clean_author
= Ctx().cvs_author_decoder(author
)
150 self
._authors
[author
] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
154 clean_author
= clean_author
.encode('utf8')
156 self
._authors
[author
] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
159 self
._authors
[author
] = clean_author
162 def _get_clean_log_msg(self
, log_msg
):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
169 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
172 'Problem decoding log message:\n'
176 % ('-' * 75, log_msg
, '-' * 75,)
180 return clean_log_msg
.encode('utf8')
183 'Problem encoding log message:\n'
187 % ('-' * 75, log_msg
, '-' * 75,)
190 def _clean_metadata(self
, metadata
):
191 """Clean up METADATA by overwriting its members as necessary."""
194 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
195 except UnicodeError, e
:
196 logger
.warn('%s: %s' % (warning_prefix
, e
,))
200 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
201 except UnicodeError, e
:
202 logger
.warn('%s: %s' % (warning_prefix
, e
,))
205 def run(self
, run_options
, stats_keeper
):
206 logger
.quiet("Converting metadata to UTF8...")
207 metadata_db
= MetadataDatabase(
208 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
209 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
212 metadata_clean_db
= MetadataDatabase(
213 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
214 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
218 self
.warnings
= False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
224 for id in metadata_db
.iterkeys():
225 metadata
= metadata_db
[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata
.original_author
= metadata
.author
231 self
._clean
_metadata
(metadata
)
233 metadata_clean_db
[id] = metadata
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db
.close()
248 class CollateSymbolsPass(Pass
):
249 """Divide symbols into branches, tags, and excludes."""
255 ExcludedSymbol
: 'exclude',
259 def register_artifacts(self
):
260 self
._register
_temp
_file
(config
.SYMBOL_DB
)
261 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
262 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
264 def get_symbol(self
, run_options
, stats
):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
276 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
278 symbol
= rule
.get_symbol(symbol
, stats
)
279 assert symbol
is not None
281 stats
.check_valid(symbol
)
285 def log_symbol_summary(self
, stats
, symbol
):
286 if not self
.symbol_info_file
:
289 if isinstance(symbol
, Trunk
):
291 preferred_parent_name
= '.'
293 name
= stats
.lod
.name
294 if symbol
.preferred_parent_id
is None:
295 preferred_parent_name
= '.'
297 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
298 if isinstance(preferred_parent
, Trunk
):
299 preferred_parent_name
= '.trunk.'
301 preferred_parent_name
= preferred_parent
.name
303 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
304 symbol_path
= symbol
.base_path
308 self
.symbol_info_file
.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats
.lod
.project
.id,
312 self
.conversion_names
[symbol
.__class
__],
314 preferred_parent_name
,
317 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
318 parent_counts
= stats
.possible_parents
.items()
320 self
.symbol_info_file
.write(' # Possible parents:\n')
321 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
322 for (pp
, count
) in parent_counts
:
323 if isinstance(pp
, Trunk
):
324 self
.symbol_info_file
.write(
325 ' # .trunk. : %d\n' % (count
,)
328 self
.symbol_info_file
.write(
329 ' # %s : %d\n' % (pp
.name
, count
,)
332 def get_symbols(self
, run_options
):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
341 Raise FatalError if there was an error."""
346 if Ctx().symbol_info_filename
is not None:
347 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
348 self
.symbol_info_file
.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
353 self
.symbol_info_file
= None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
359 for rule_list
in run_options
.project_symbol_strategy_rules
:
360 for rule
in rule_list
:
361 rules
[id(rule
)] = rule
363 for rule
in rules
.itervalues():
364 rule
.start(self
.symbol_stats
)
368 for stats
in self
.symbol_stats
:
370 symbol
= self
.get_symbol(run_options
, stats
)
371 except IndeterminateSymbolException
, e
:
372 self
.log_symbol_summary(stats
, stats
.lod
)
373 mismatches
.append(e
.stats
)
374 except SymbolPlanError
, e
:
375 self
.log_symbol_summary(stats
, stats
.lod
)
378 self
.log_symbol_summary(stats
, symbol
)
379 retval
[stats
.lod
] = symbol
381 for rule
in rules
.itervalues():
384 if self
.symbol_info_file
:
385 self
.symbol_info_file
.close()
387 del self
.symbol_info_file
389 if errors
or mismatches
:
390 s
= ['Problems determining how symbols should be converted:\n']
392 s
.append('%s\n' % (e
,))
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats
in mismatches
:
402 s
.append(' %s\n' % (stats
,))
403 raise FatalError(''.join(s
))
407 def run(self
, run_options
, stats_keeper
):
408 Ctx()._projects
= read_projects(
409 artifact_manager
.get_temp_file(config
.PROJECTS
)
411 self
.symbol_stats
= SymbolStatistics(
412 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
415 symbol_map
= self
.get_symbols(run_options
)
417 # Check the symbols for consistency and bail out if there were errors:
418 self
.symbol_stats
.check_consistency(symbol_map
)
420 # Check that the symbols all have SVN paths set and that the paths
422 Ctx().output_option
.check_symbols(symbol_map
)
424 for symbol
in symbol_map
.itervalues():
425 if isinstance(symbol
, ExcludedSymbol
):
426 self
.symbol_stats
.exclude_symbol(symbol
)
428 create_symbol_database(symbol_map
.values())
430 del self
.symbol_stats
435 class FilterSymbolsPass(Pass
):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self
):
442 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
443 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
444 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
445 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
446 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
447 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
448 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
449 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
450 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
451 Ctx().revision_collector
.register_artifacts(self
)
453 def run(self
, run_options
, stats_keeper
):
454 Ctx()._projects
= read_projects(
455 artifact_manager
.get_temp_file(config
.PROJECTS
)
457 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
458 Ctx()._metadata
_db
= MetadataDatabase(
459 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
460 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
463 Ctx()._symbol
_db
= SymbolDatabase()
464 cvs_item_store
= OldCVSItemStore(
465 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
467 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
468 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
469 cPickle
.dump(cvs_item_serializer
, f
, -1)
472 rev_db
= NewSortableCVSRevisionDatabase(
473 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
477 symbol_db
= NewSortableCVSSymbolDatabase(
478 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
482 revision_collector
= Ctx().revision_collector
484 logger
.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper
.reset_cvs_rev_info()
487 revision_collector
.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
491 logger
.verbose(cvs_file_items
.cvs_file
.rcs_path
)
492 cvs_file_items
.filter_excluded_symbols()
493 cvs_file_items
.mutate_symbols()
494 cvs_file_items
.adjust_parents()
495 cvs_file_items
.refine_symbols()
496 cvs_file_items
.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items
.record_opened_symbols()
500 cvs_file_items
.record_closed_symbols()
501 cvs_file_items
.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
505 revision_collector
.process_file(cvs_file_items
)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
509 for cvs_item
in cvs_file_items
.values():
510 stats_keeper
.record_cvs_item(cvs_item
)
512 if isinstance(cvs_item
, CVSRevision
):
514 elif isinstance(cvs_item
, CVSSymbol
):
515 symbol_db
.add(cvs_item
)
517 stats_keeper
.set_stats_reflect_exclude(True)
521 revision_collector
.finish()
522 cvs_item_store
.close()
523 Ctx()._symbol
_db
.close()
524 Ctx()._cvs
_path
_db
.close()
529 class SortRevisionsPass(Pass
):
530 """Sort the revisions file."""
532 def register_artifacts(self
):
533 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
534 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
536 def run(self
, run_options
, stats_keeper
):
537 logger
.quiet("Sorting CVS revision summaries...")
539 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
540 artifact_manager
.get_temp_file(
541 config
.CVS_REVS_SORTED_DATAFILE
543 tempdirs
=[Ctx().tmpdir
],
548 class SortSymbolsPass(Pass
):
549 """Sort the symbols file."""
551 def register_artifacts(self
):
552 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
553 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
555 def run(self
, run_options
, stats_keeper
):
556 logger
.quiet("Sorting CVS symbol summaries...")
558 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
559 artifact_manager
.get_temp_file(
560 config
.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs
=[Ctx().tmpdir
],
567 class InitializeChangesetsPass(Pass
):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self
):
571 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
572 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
573 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
574 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
575 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
576 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
577 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
578 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
579 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
580 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
581 self
._register
_temp
_file
_needed
(
582 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
584 def get_revision_changesets(self
):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id
= None
595 db
= OldSortableCVSRevisionDatabase(
596 artifact_manager
.get_temp_file(
597 config
.CVS_REVS_SORTED_DATAFILE
599 self
.cvs_item_serializer
,
603 if cvs_rev
.metadata_id
!= old_metadata_id \
604 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
605 # Start a new changeset. First finish up the old changeset,
608 yield changeset_items
610 old_metadata_id
= cvs_rev
.metadata_id
611 changeset_items
.append(cvs_rev
)
612 old_timestamp
= cvs_rev
.timestamp
614 # Finish up the last changeset, if any:
616 yield changeset_items
618 def get_symbol_changesets(self
):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
627 db
= OldSortableCVSSymbolDatabase(
628 artifact_manager
.get_temp_file(
629 config
.CVS_SYMBOLS_SORTED_DATAFILE
631 self
.cvs_item_serializer
,
634 for cvs_symbol
in db
:
635 if cvs_symbol
.symbol
.id != old_symbol_id
:
636 # Start a new changeset. First finish up the old changeset,
639 yield changeset_items
641 old_symbol_id
= cvs_symbol
.symbol
.id
642 changeset_items
.append(cvs_symbol
)
644 # Finish up the last changeset, if any:
646 yield changeset_items
649 def compare_items(a
, b
):
651 cmp(a
.timestamp
, b
.timestamp
)
652 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
653 or cmp([int(x
) for x
in a
.rev
.split('.')],
654 [int(x
) for x
in b
.rev
.split('.')])
657 def break_internal_dependencies(self
, changeset_items
):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
675 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
676 for cvs_item
in changeset_items
:
677 for next_id
in cvs_item
.get_succ_ids():
678 if next_id
in changeset_cvs_item_ids
:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id
== cvs_item
.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
683 dependencies
.append((cvs_item
.id, next_id
,))
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items
.sort(self
.compare_items
)
690 for (i
, changeset_item
) in enumerate(changeset_items
):
691 indexes
[changeset_item
.id] = i
693 # How many internal dependencies would be broken by breaking the
694 # Changeset after a particular index?
695 breaks
= [0] * len(changeset_items
)
696 for (pred
, succ
,) in dependencies
:
697 pred_index
= indexes
[pred
]
698 succ_index
= indexes
[succ
]
699 breaks
[min(pred_index
, succ_index
)] += 1
700 breaks
[max(pred_index
, succ_index
)] -= 1
701 for i
in range(1, len(breaks
)):
702 breaks
[i
] += breaks
[i
- 1]
707 for i
in range(0, len(breaks
) - 1):
708 gap
= changeset_items
[i
+ 1].timestamp
- changeset_items
[i
].timestamp
709 if breaks
[i
] > best_count
:
711 best_count
= breaks
[i
]
713 elif breaks
[i
] == best_count
and gap
< best_gap
:
715 best_count
= breaks
[i
]
718 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
720 return [changeset_items
]
722 def break_all_internal_dependencies(self
, changeset_items
):
723 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
725 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
726 be part of a single changeset. Break this list into sublists,
727 where the CVSRevisions in each sublist are free of mutual
730 # This method is written non-recursively to avoid any possible
731 # problems with recursion depth.
733 changesets_to_split
= [changeset_items
]
734 while changesets_to_split
:
735 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
736 if len(changesets
) == 1:
737 [changeset_items
] = changesets
738 yield changeset_items
740 # The changeset had to be split; see if either of the
741 # fragments have to be split:
743 changesets_to_split
.extend(changesets
)
745 def get_changesets(self
):
746 """Generate (Changeset, [CVSItem,...]) for all changesets.
748 The Changesets already have their internal dependencies broken.
749 The [CVSItem,...] list is the list of CVSItems in the
750 corresponding Changeset."""
752 for changeset_items
in self
.get_revision_changesets():
753 for split_changeset_items \
754 in self
.break_all_internal_dependencies(changeset_items
):
757 self
.changeset_key_generator
.gen_id(),
758 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
760 split_changeset_items
,
763 for changeset_items
in self
.get_symbol_changesets():
765 create_symbol_changeset(
766 self
.changeset_key_generator
.gen_id(),
767 changeset_items
[0].symbol
,
768 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
773 def run(self
, run_options
, stats_keeper
):
774 logger
.quiet("Creating preliminary commit sets...")
776 Ctx()._projects
= read_projects(
777 artifact_manager
.get_temp_file(config
.PROJECTS
)
779 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
780 Ctx()._symbol
_db
= SymbolDatabase()
782 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
783 self
.cvs_item_serializer
= cPickle
.load(f
)
786 changeset_db
= ChangesetDatabase(
787 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
788 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
791 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
792 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
796 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
797 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
798 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
801 self
.changeset_key_generator
= KeyGenerator()
803 for (changeset
, changeset_items
) in self
.get_changesets():
804 if logger
.is_on(logger
.DEBUG
):
805 logger
.debug(repr(changeset
))
806 changeset_db
.store(changeset
)
807 for cvs_item
in changeset_items
:
808 self
.sorted_cvs_items_db
.add(cvs_item
)
809 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
811 self
.sorted_cvs_items_db
.close()
812 cvs_item_to_changeset_id
.close()
814 Ctx()._symbol
_db
.close()
815 Ctx()._cvs
_path
_db
.close()
817 del self
.cvs_item_serializer
822 class ProcessedChangesetLogger
:
824 self
.processed_changeset_ids
= []
826 def log(self
, changeset_id
):
827 if logger
.is_on(logger
.DEBUG
):
828 self
.processed_changeset_ids
.append(changeset_id
)
831 if self
.processed_changeset_ids
:
833 'Consumed changeset ids %s'
834 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
836 del self
.processed_changeset_ids
[:]
839 class BreakRevisionChangesetCyclesPass(Pass
):
840 """Break up any dependency cycles involving only RevisionChangesets."""
842 def register_artifacts(self
):
843 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
844 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
845 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
846 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
847 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
848 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
849 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
850 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
851 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
852 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
853 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
855 def get_source_changesets(self
):
856 old_changeset_db
= ChangesetDatabase(
857 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
858 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
861 changeset_ids
= old_changeset_db
.keys()
863 for changeset_id
in changeset_ids
:
864 yield old_changeset_db
[changeset_id
]
866 old_changeset_db
.close()
869 def break_cycle(self
, cycle
):
870 """Break up one or more changesets in CYCLE to help break the cycle.
872 CYCLE is a list of Changesets where
874 cycle[i] depends on cycle[i - 1]
876 Break up one or more changesets in CYCLE to make progress towards
877 breaking the cycle. Update self.changeset_graph accordingly.
879 It is not guaranteed that the cycle will be broken by one call to
880 this routine, but at least some progress must be made."""
882 self
.processed_changeset_logger
.flush()
885 for i
in range(len(cycle
)):
886 # It's OK if this index wraps to -1:
887 link
= ChangesetGraphLink(
888 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
890 if best_i
is None or link
< best_link
:
894 if logger
.is_on(logger
.DEBUG
):
896 'Breaking cycle %s by breaking node %x' % (
897 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
898 best_link
.changeset
.id,))
900 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
902 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
904 for changeset
in new_changesets
:
905 self
.changeset_graph
.add_new_changeset(changeset
)
907 def run(self
, run_options
, stats_keeper
):
908 logger
.quiet("Breaking revision changeset dependency cycles...")
910 Ctx()._projects
= read_projects(
911 artifact_manager
.get_temp_file(config
.PROJECTS
)
913 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
914 Ctx()._symbol
_db
= SymbolDatabase()
915 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
916 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
917 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
921 artifact_manager
.get_temp_file(
922 config
.CVS_ITEM_TO_CHANGESET
),
923 artifact_manager
.get_temp_file(
924 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
925 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
926 artifact_manager
.get_temp_file(
927 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
930 changeset_db
= ChangesetDatabase(
931 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
932 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
935 self
.changeset_graph
= ChangesetGraph(
936 changeset_db
, cvs_item_to_changeset_id
940 for changeset
in self
.get_source_changesets():
941 changeset_db
.store(changeset
)
942 if isinstance(changeset
, RevisionChangeset
):
943 self
.changeset_graph
.add_changeset(changeset
)
944 max_changeset_id
= max(max_changeset_id
, changeset
.id)
946 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
948 self
.processed_changeset_logger
= ProcessedChangesetLogger()
950 # Consume the graph, breaking cycles using self.break_cycle():
951 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
952 cycle_breaker
=self
.break_cycle
954 self
.processed_changeset_logger
.log(changeset
.id)
956 self
.processed_changeset_logger
.flush()
957 del self
.processed_changeset_logger
959 self
.changeset_graph
.close()
960 self
.changeset_graph
= None
961 Ctx()._cvs
_items
_db
.close()
962 Ctx()._symbol
_db
.close()
963 Ctx()._cvs
_path
_db
.close()
968 class RevisionTopologicalSortPass(Pass
):
969 """Sort RevisionChangesets into commit order.
971 Also convert them to OrderedChangesets, without changing their ids."""
973 def register_artifacts(self
):
974 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
975 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
976 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
977 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
978 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
979 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
980 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
981 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
982 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
983 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
985 def get_source_changesets(self
, changeset_db
):
986 changeset_ids
= changeset_db
.keys()
988 for changeset_id
in changeset_ids
:
989 yield changeset_db
[changeset_id
]
991 def get_changesets(self
):
992 changeset_db
= ChangesetDatabase(
993 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
994 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
998 changeset_graph
= ChangesetGraph(
1000 CVSItemToChangesetTable(
1001 artifact_manager
.get_temp_file(
1002 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
1008 for changeset
in self
.get_source_changesets(changeset_db
):
1009 if isinstance(changeset
, RevisionChangeset
):
1010 changeset_graph
.add_changeset(changeset
)
1017 changeset_ids
.append(None)
1019 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1020 changeset_ids
.append(changeset
.id)
1023 changeset_ids
.append(None)
1025 for i
in range(1, len(changeset_ids
) - 1):
1026 changeset
= changeset_db
[changeset_ids
[i
]]
1027 yield OrderedChangeset(
1028 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1029 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1031 changeset_graph
.close()
1033 def run(self
, run_options
, stats_keeper
):
1034 logger
.quiet("Generating CVSRevisions in commit order...")
1036 Ctx()._projects
= read_projects(
1037 artifact_manager
.get_temp_file(config
.PROJECTS
)
1039 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1040 Ctx()._symbol
_db
= SymbolDatabase()
1041 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1042 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1043 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1046 changesets_revordered_db
= ChangesetDatabase(
1047 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1048 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1051 for changeset
in self
.get_changesets():
1052 changesets_revordered_db
.store(changeset
)
1054 changesets_revordered_db
.close()
1055 Ctx()._cvs
_items
_db
.close()
1056 Ctx()._symbol
_db
.close()
1057 Ctx()._cvs
_path
_db
.close()
1059 logger
.quiet("Done")
1062 class BreakSymbolChangesetCyclesPass(Pass
):
1063 """Break up any dependency cycles involving only SymbolChangesets."""
1065 def register_artifacts(self
):
1066 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1067 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1068 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1069 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1070 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1071 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1072 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1073 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1074 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1075 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1076 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1078 def get_source_changesets(self
):
1079 old_changeset_db
= ChangesetDatabase(
1080 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1081 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1084 changeset_ids
= old_changeset_db
.keys()
1086 for changeset_id
in changeset_ids
:
1087 yield old_changeset_db
[changeset_id
]
1089 old_changeset_db
.close()
1091 def break_cycle(self
, cycle
):
1092 """Break up one or more changesets in CYCLE to help break the cycle.
1094 CYCLE is a list of Changesets where
1096 cycle[i] depends on cycle[i - 1]
1098 Break up one or more changesets in CYCLE to make progress towards
1099 breaking the cycle. Update self.changeset_graph accordingly.
1101 It is not guaranteed that the cycle will be broken by one call to
1102 this routine, but at least some progress must be made."""
1104 self
.processed_changeset_logger
.flush()
1107 for i
in range(len(cycle
)):
1108 # It's OK if this index wraps to -1:
1109 link
= ChangesetGraphLink(
1110 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1112 if best_i
is None or link
< best_link
:
1116 if logger
.is_on(logger
.DEBUG
):
1118 'Breaking cycle %s by breaking node %x' % (
1119 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1120 best_link
.changeset
.id,))
1122 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1124 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1126 for changeset
in new_changesets
:
1127 self
.changeset_graph
.add_new_changeset(changeset
)
1129 def run(self
, run_options
, stats_keeper
):
1130 logger
.quiet("Breaking symbol changeset dependency cycles...")
1132 Ctx()._projects
= read_projects(
1133 artifact_manager
.get_temp_file(config
.PROJECTS
)
1135 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1136 Ctx()._symbol
_db
= SymbolDatabase()
1137 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1138 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1139 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1143 artifact_manager
.get_temp_file(
1144 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1145 artifact_manager
.get_temp_file(
1146 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1147 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1148 artifact_manager
.get_temp_file(
1149 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1152 changeset_db
= ChangesetDatabase(
1153 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1154 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1157 self
.changeset_graph
= ChangesetGraph(
1158 changeset_db
, cvs_item_to_changeset_id
1161 max_changeset_id
= 0
1162 for changeset
in self
.get_source_changesets():
1163 changeset_db
.store(changeset
)
1164 if isinstance(changeset
, SymbolChangeset
):
1165 self
.changeset_graph
.add_changeset(changeset
)
1166 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1168 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1170 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1172 # Consume the graph, breaking cycles using self.break_cycle():
1173 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1174 cycle_breaker
=self
.break_cycle
1176 self
.processed_changeset_logger
.log(changeset
.id)
1178 self
.processed_changeset_logger
.flush()
1179 del self
.processed_changeset_logger
1181 self
.changeset_graph
.close()
1182 self
.changeset_graph
= None
1183 Ctx()._cvs
_items
_db
.close()
1184 Ctx()._symbol
_db
.close()
1185 Ctx()._cvs
_path
_db
.close()
1187 logger
.quiet("Done")
1190 class BreakAllChangesetCyclesPass(Pass
):
1191 """Break up any dependency cycles that are closed by SymbolChangesets."""
1193 def register_artifacts(self
):
1194 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1195 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1196 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1197 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1198 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1199 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1200 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1201 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1202 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1203 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1204 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1206 def get_source_changesets(self
):
1207 old_changeset_db
= ChangesetDatabase(
1208 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1209 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1212 changeset_ids
= old_changeset_db
.keys()
1214 for changeset_id
in changeset_ids
:
1215 yield old_changeset_db
[changeset_id
]
1217 old_changeset_db
.close()
1219 def _split_retrograde_changeset(self
, changeset
):
1220 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1222 logger
.debug('Breaking retrograde changeset %x' % (changeset
.id,))
1224 self
.changeset_graph
.delete_changeset(changeset
)
1226 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1228 for cvs_branch
in changeset
.iter_cvs_items():
1229 max_pred_ordinal
= 0
1230 min_succ_ordinal
= sys
.maxint
1232 for pred_id
in cvs_branch
.get_pred_ids():
1233 pred_ordinal
= self
.ordinals
.get(
1234 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1235 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1237 for succ_id
in cvs_branch
.get_succ_ids():
1238 succ_ordinal
= self
.ordinals
.get(
1239 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1240 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1242 assert max_pred_ordinal
< min_succ_ordinal
1243 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1245 # Find the earliest successor ordinal:
1246 min_min_succ_ordinal
= sys
.maxint
1247 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1248 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1252 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1253 if max_pred_ordinal
>= min_min_succ_ordinal
:
1254 late_item_ids
.append(id)
1256 early_item_ids
.append(id)
1258 assert early_item_ids
1259 assert late_item_ids
1261 early_changeset
= changeset
.create_split_changeset(
1262 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1263 late_changeset
= changeset
.create_split_changeset(
1264 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1266 self
.changeset_graph
.add_new_changeset(early_changeset
)
1267 self
.changeset_graph
.add_new_changeset(late_changeset
)
1269 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1271 # Because of the way we constructed it, the early changeset should
1272 # not have to be split:
1273 assert not early_split
1275 self
._split
_if
_retrograde
(late_changeset
.id)
1277 def _split_if_retrograde(self
, changeset_id
):
1278 node
= self
.changeset_graph
[changeset_id
]
1281 for id in node
.pred_ids
1282 if id in self
.ordinals
1284 pred_ordinals
.sort()
1287 for id in node
.succ_ids
1288 if id in self
.ordinals
1290 succ_ordinals
.sort()
1291 if pred_ordinals
and succ_ordinals \
1292 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1293 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1298 def break_segment(self
, segment
):
1299 """Break a changeset in SEGMENT[1:-1].
1301 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1302 that range are SymbolChangesets."""
1306 for i
in range(1, len(segment
) - 1):
1307 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1309 if best_i
is None or link
< best_link
:
1313 if logger
.is_on(logger
.DEBUG
):
1315 'Breaking segment %s by breaking node %x' % (
1316 ' -> '.join(['%x' % node
.id for node
in segment
]),
1317 best_link
.changeset
.id,))
1319 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1321 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1323 for changeset
in new_changesets
:
1324 self
.changeset_graph
.add_new_changeset(changeset
)
1326 def break_cycle(self
, cycle
):
1327 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1329 CYCLE is a list of SymbolChangesets where
1331 cycle[i] depends on cycle[i - 1]
1333 . Break up one or more changesets in CYCLE to make progress
1334 towards breaking the cycle. Update self.changeset_graph
1337 It is not guaranteed that the cycle will be broken by one call to
1338 this routine, but at least some progress must be made."""
1340 if logger
.is_on(logger
.DEBUG
):
1342 'Breaking cycle %s' % (
1343 ' -> '.join(['%x' % changeset
.id
1344 for changeset
in cycle
+ [cycle
[0]]]),))
1346 # Unwrap the cycle into a segment then break the segment:
1347 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1349 def run(self
, run_options
, stats_keeper
):
1350 logger
.quiet("Breaking CVSSymbol dependency loops...")
1352 Ctx()._projects
= read_projects(
1353 artifact_manager
.get_temp_file(config
.PROJECTS
)
1355 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1356 Ctx()._symbol
_db
= SymbolDatabase()
1357 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1358 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1359 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1363 artifact_manager
.get_temp_file(
1364 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1365 artifact_manager
.get_temp_file(
1366 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1367 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1368 artifact_manager
.get_temp_file(
1369 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1372 self
.changeset_db
= ChangesetDatabase(
1373 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1374 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1377 self
.changeset_graph
= ChangesetGraph(
1378 self
.changeset_db
, self
.cvs_item_to_changeset_id
1381 # A map {changeset_id : ordinal} for OrderedChangesets:
1383 # A map {ordinal : changeset_id}:
1384 ordered_changeset_map
= {}
1385 # A list of all BranchChangeset ids:
1386 branch_changeset_ids
= []
1387 max_changeset_id
= 0
1388 for changeset
in self
.get_source_changesets():
1389 self
.changeset_db
.store(changeset
)
1390 self
.changeset_graph
.add_changeset(changeset
)
1391 if isinstance(changeset
, OrderedChangeset
):
1392 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1393 self
.ordinals
[changeset
.id] = changeset
.ordinal
1394 elif isinstance(changeset
, BranchChangeset
):
1395 branch_changeset_ids
.append(changeset
.id)
1396 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1398 # An array of ordered_changeset ids, indexed by ordinal:
1399 ordered_changesets
= []
1400 for ordinal
in range(len(ordered_changeset_map
)):
1401 id = ordered_changeset_map
[ordinal
]
1402 ordered_changesets
.append(id)
1404 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1405 del ordered_changeset_map
1407 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1409 # First we scan through all BranchChangesets looking for
1410 # changesets that are individually "retrograde" and splitting
1412 for changeset_id
in branch_changeset_ids
:
1413 self
._split
_if
_retrograde
(changeset_id
)
1417 next_ordered_changeset
= 0
1419 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1421 while self
.changeset_graph
:
1422 # Consume any nodes that don't have predecessors:
1423 for (changeset
, time_range
) \
1424 in self
.changeset_graph
.consume_nopred_nodes():
1425 self
.processed_changeset_logger
.log(changeset
.id)
1426 if changeset
.id in ordered_changeset_ids
:
1427 next_ordered_changeset
+= 1
1428 ordered_changeset_ids
.remove(changeset
.id)
1430 self
.processed_changeset_logger
.flush()
1432 if not self
.changeset_graph
:
1435 # Now work on the next ordered changeset that has not yet been
1436 # processed. BreakSymbolChangesetCyclesPass has broken any
1437 # cycles involving only SymbolChangesets, so the presence of a
1438 # cycle implies that there is at least one ordered changeset
1439 # left in the graph:
1440 assert next_ordered_changeset
< len(ordered_changesets
)
1442 id = ordered_changesets
[next_ordered_changeset
]
1443 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1445 if logger
.is_on(logger
.DEBUG
):
1446 logger
.debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1447 self
.break_segment(path
)
1449 # There were no ordered changesets among the reachable
1450 # predecessors, so do generic cycle-breaking:
1451 if logger
.is_on(logger
.DEBUG
):
1453 'Breaking generic cycle found from %s'
1454 % (self
.changeset_db
[id],)
1456 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1458 del self
.processed_changeset_logger
1459 self
.changeset_graph
.close()
1460 self
.changeset_graph
= None
1461 self
.cvs_item_to_changeset_id
= None
1462 self
.changeset_db
= None
1464 logger
.quiet("Done")
1467 class TopologicalSortPass(Pass
):
1468 """Sort changesets into commit order."""
1470 def register_artifacts(self
):
1471 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1472 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1473 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1474 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1475 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1476 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1477 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1478 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1479 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1481 def get_source_changesets(self
, changeset_db
):
1482 for changeset_id
in changeset_db
.keys():
1483 yield changeset_db
[changeset_id
]
1485 def get_changesets(self
):
1486 """Generate (changeset, timestamp) pairs in commit order."""
1488 changeset_db
= ChangesetDatabase(
1489 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1490 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1493 changeset_graph
= ChangesetGraph(
1495 CVSItemToChangesetTable(
1496 artifact_manager
.get_temp_file(
1497 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1502 symbol_changeset_ids
= set()
1504 for changeset
in self
.get_source_changesets(changeset_db
):
1505 changeset_graph
.add_changeset(changeset
)
1506 if isinstance(changeset
, SymbolChangeset
):
1507 symbol_changeset_ids
.add(changeset
.id)
1509 # Ensure a monotonically-increasing timestamp series by keeping
1510 # track of the previous timestamp and ensuring that the following
1512 timestamper
= Timestamper()
1514 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1515 timestamp
= timestamper
.get(
1516 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1518 yield (changeset
, timestamp
)
1520 changeset_graph
.close()
1522 def run(self
, run_options
, stats_keeper
):
1523 logger
.quiet("Generating CVSRevisions in commit order...")
1525 Ctx()._projects
= read_projects(
1526 artifact_manager
.get_temp_file(config
.PROJECTS
)
1528 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1529 Ctx()._symbol
_db
= SymbolDatabase()
1530 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1531 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1532 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1535 sorted_changesets
= open(
1536 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1539 for (changeset
, timestamp
) in self
.get_changesets():
1540 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1542 sorted_changesets
.close()
1544 Ctx()._cvs
_items
_db
.close()
1545 Ctx()._symbol
_db
.close()
1546 Ctx()._cvs
_path
_db
.close()
1548 logger
.quiet("Done")
1551 class CreateRevsPass(Pass
):
1552 """Generate the SVNCommit <-> CVSRevision mapping databases.
1554 SVNCommitCreator also calls SymbolingsLogger to register
1555 CVSRevisions that represent an opening or closing for a path on a
1556 branch or tag. See SymbolingsLogger for more details.
1558 This pass was formerly known as pass5."""
1560 def register_artifacts(self
):
1561 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1562 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1563 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1564 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1565 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1566 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1567 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1568 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1569 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1570 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1571 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1572 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1574 def get_changesets(self
):
1575 """Generate (changeset,timestamp,) tuples in commit order."""
1577 changeset_db
= ChangesetDatabase(
1578 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1579 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1583 artifact_manager
.get_temp_file(
1584 config
.CHANGESETS_SORTED_DATAFILE
)):
1585 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1586 yield (changeset_db
[changeset_id
], timestamp
)
1588 changeset_db
.close()
1590 def get_svn_commits(self
, creator
):
1591 """Generate the SVNCommits, in order."""
1593 for (changeset
, timestamp
) in self
.get_changesets():
1594 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1597 def log_svn_commit(self
, svn_commit
):
1598 """Output information about SVN_COMMIT."""
1601 'Creating Subversion r%d (%s)'
1602 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1605 if isinstance(svn_commit
, SVNRevisionCommit
):
1606 for cvs_rev
in svn_commit
.cvs_revs
:
1607 logger
.verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1609 def run(self
, run_options
, stats_keeper
):
1610 logger
.quiet("Mapping CVS revisions to Subversion commits...")
1612 Ctx()._projects
= read_projects(
1613 artifact_manager
.get_temp_file(config
.PROJECTS
)
1615 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1616 Ctx()._symbol
_db
= SymbolDatabase()
1617 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1618 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1619 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1622 Ctx()._symbolings
_logger
= SymbolingsLogger()
1624 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1626 creator
= SVNCommitCreator()
1627 for svn_commit
in self
.get_svn_commits(creator
):
1628 self
.log_svn_commit(svn_commit
)
1629 persistence_manager
.put_svn_commit(svn_commit
)
1631 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1634 persistence_manager
.close()
1635 Ctx()._symbolings
_logger
.close()
1636 Ctx()._cvs
_items
_db
.close()
1637 Ctx()._symbol
_db
.close()
1638 Ctx()._cvs
_path
_db
.close()
1640 logger
.quiet("Done")
1643 class SortSymbolOpeningsClosingsPass(Pass
):
1644 """This pass was formerly known as pass6."""
1646 def register_artifacts(self
):
1647 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1648 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1650 def run(self
, run_options
, stats_keeper
):
1651 logger
.quiet("Sorting symbolic name source revisions...")
1654 line
= line
.split(' ', 2)
1655 return (int(line
[0], 16), int(line
[1]), line
[2],)
1658 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1659 artifact_manager
.get_temp_file(
1660 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1663 tempdirs
=[Ctx().tmpdir
],
1665 logger
.quiet("Done")
1668 class IndexSymbolsPass(Pass
):
1669 """This pass was formerly known as pass7."""
1671 def register_artifacts(self
):
1672 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1673 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1674 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1675 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1677 def generate_offsets_for_symbolings(self
):
1678 """This function iterates through all the lines in
1679 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1680 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1681 where SYMBOLIC_NAME is first encountered. This will allow us to
1682 seek to the various offsets in the file and sequentially read only
1683 the openings and closings that we need."""
1688 artifact_manager
.get_temp_file(
1689 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1697 id, svn_revnum
, ignored
= line
.split(" ", 2)
1700 logger
.verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1707 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1708 cPickle
.dump(offsets
, offsets_db
, -1)
1711 def run(self
, run_options
, stats_keeper
):
1712 logger
.quiet("Determining offsets for all symbolic names...")
1713 Ctx()._projects
= read_projects(
1714 artifact_manager
.get_temp_file(config
.PROJECTS
)
1716 Ctx()._symbol
_db
= SymbolDatabase()
1717 self
.generate_offsets_for_symbolings()
1718 Ctx()._symbol
_db
.close()
1719 logger
.quiet("Done.")
1722 class OutputPass(Pass
):
1723 """This pass was formerly known as pass8."""
1725 def register_artifacts(self
):
1726 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1727 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1728 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1729 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1730 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1731 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1732 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1733 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1734 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1735 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1736 Ctx().output_option
.register_artifacts(self
)
1738 def run(self
, run_options
, stats_keeper
):
1739 Ctx()._projects
= read_projects(
1740 artifact_manager
.get_temp_file(config
.PROJECTS
)
1742 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1743 Ctx()._metadata
_db
= MetadataDatabase(
1744 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1745 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1748 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1749 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1750 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1752 Ctx()._symbol
_db
= SymbolDatabase()
1753 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1755 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1758 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1760 svn_commit
.output(Ctx().output_option
)
1762 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1764 Ctx().output_option
.cleanup()
1765 Ctx()._persistence
_manager
.close()
1767 Ctx()._symbol
_db
.close()
1768 Ctx()._cvs
_items
_db
.close()
1769 Ctx()._metadata
_db
.close()
1770 Ctx()._cvs
_path
_db
.close()
1773 # The list of passes constituting a run of cvs2svn:
1776 CleanMetadataPass(),
1777 CollateSymbolsPass(),
1778 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1779 FilterSymbolsPass(),
1780 SortRevisionsPass(),
1782 InitializeChangesetsPass(),
1783 #CheckIndexedItemStoreDependenciesPass(
1784 # config.CVS_ITEMS_SORTED_STORE,
1785 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1786 BreakRevisionChangesetCyclesPass(),
1787 RevisionTopologicalSortPass(),
1788 BreakSymbolChangesetCyclesPass(),
1789 BreakAllChangesetCyclesPass(),
1790 TopologicalSortPass(),
1792 SortSymbolOpeningsClosingsPass(),