1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import logger
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.repository_walker
import walk_repository
78 from cvs2svn_lib
.collect_data
import CollectData
79 from cvs2svn_lib
.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib
.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass
):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self
):
89 self
._register
_temp
_file
(config
.PROJECTS
)
90 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
91 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
92 self
._register
_temp
_file
(config
.METADATA_STORE
)
93 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
94 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
96 def run(self
, run_options
, stats_keeper
):
97 logger
.quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(stats_keeper
)
102 # Key generator for CVSFiles:
103 file_key_generator
= KeyGenerator()
105 for project
in run_options
.projects
:
106 Ctx()._projects
[project
.id] = project
109 walk_repository(project
, file_key_generator
, cd
.record_fatal_error
),
111 run_options
.projects
= None
113 fatal_errors
= cd
.close()
116 raise FatalException("Pass 1 complete.\n"
119 + "\n".join(fatal_errors
) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs
_path
_db
.close()
123 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
127 class CleanMetadataPass(Pass
):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self
):
131 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
132 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
133 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
134 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
136 def _get_clean_author(self
, author
):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
143 return self
._authors
[author
]
148 clean_author
= Ctx().cvs_author_decoder(author
)
150 self
._authors
[author
] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
154 clean_author
= clean_author
.encode('utf8')
156 self
._authors
[author
] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
159 self
._authors
[author
] = clean_author
162 def _get_clean_log_msg(self
, log_msg
):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
169 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
172 'Problem decoding log message:\n'
176 % ('-' * 75, log_msg
, '-' * 75,)
180 return clean_log_msg
.encode('utf8')
183 'Problem encoding log message:\n'
187 % ('-' * 75, log_msg
, '-' * 75,)
190 def _clean_metadata(self
, metadata
):
191 """Clean up METADATA by overwriting its members as necessary."""
194 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
195 except UnicodeError, e
:
196 logger
.warn('%s: %s' % (warning_prefix
, e
,))
200 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
201 except UnicodeError, e
:
202 logger
.warn('%s: %s' % (warning_prefix
, e
,))
205 def run(self
, run_options
, stats_keeper
):
206 logger
.quiet("Converting metadata to UTF8...")
207 metadata_db
= MetadataDatabase(
208 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
209 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
212 metadata_clean_db
= MetadataDatabase(
213 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
214 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
218 self
.warnings
= False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
224 for id in metadata_db
.iterkeys():
225 metadata
= metadata_db
[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata
.original_author
= metadata
.author
231 self
._clean
_metadata
(metadata
)
233 metadata_clean_db
[id] = metadata
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db
.close()
248 class CollateSymbolsPass(Pass
):
249 """Divide symbols into branches, tags, and excludes."""
255 ExcludedSymbol
: 'exclude',
259 def register_artifacts(self
):
260 self
._register
_temp
_file
(config
.SYMBOL_DB
)
261 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
262 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
264 def get_symbol(self
, run_options
, stats
):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
276 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
278 symbol
= rule
.get_symbol(symbol
, stats
)
279 assert symbol
is not None
281 stats
.check_valid(symbol
)
285 def log_symbol_summary(self
, stats
, symbol
):
286 if not self
.symbol_info_file
:
289 if isinstance(symbol
, Trunk
):
291 preferred_parent_name
= '.'
293 name
= stats
.lod
.name
294 if symbol
.preferred_parent_id
is None:
295 preferred_parent_name
= '.'
297 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
298 if isinstance(preferred_parent
, Trunk
):
299 preferred_parent_name
= '.trunk.'
301 preferred_parent_name
= preferred_parent
.name
303 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
304 symbol_path
= symbol
.base_path
308 self
.symbol_info_file
.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats
.lod
.project
.id,
312 self
.conversion_names
[symbol
.__class
__],
314 preferred_parent_name
,
317 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
318 parent_counts
= stats
.possible_parents
.items()
320 self
.symbol_info_file
.write(' # Possible parents:\n')
321 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
322 for (pp
, count
) in parent_counts
:
323 if isinstance(pp
, Trunk
):
324 self
.symbol_info_file
.write(
325 ' # .trunk. : %d\n' % (count
,)
328 self
.symbol_info_file
.write(
329 ' # %s : %d\n' % (pp
.name
, count
,)
332 def get_symbols(self
, run_options
):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
341 Raise FatalError if there was an error."""
346 if Ctx().symbol_info_filename
is not None:
347 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
348 self
.symbol_info_file
.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
353 self
.symbol_info_file
= None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
359 for rule_list
in run_options
.project_symbol_strategy_rules
:
360 for rule
in rule_list
:
361 rules
[id(rule
)] = rule
363 for rule
in rules
.itervalues():
364 rule
.start(self
.symbol_stats
)
368 for stats
in self
.symbol_stats
:
370 symbol
= self
.get_symbol(run_options
, stats
)
371 except IndeterminateSymbolException
, e
:
372 self
.log_symbol_summary(stats
, stats
.lod
)
373 mismatches
.append(e
.stats
)
374 except SymbolPlanError
, e
:
375 self
.log_symbol_summary(stats
, stats
.lod
)
378 self
.log_symbol_summary(stats
, symbol
)
379 retval
[stats
.lod
] = symbol
381 for rule
in rules
.itervalues():
384 if self
.symbol_info_file
:
385 self
.symbol_info_file
.close()
387 del self
.symbol_info_file
389 if errors
or mismatches
:
390 s
= ['Problems determining how symbols should be converted:\n']
392 s
.append('%s\n' % (e
,))
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats
in mismatches
:
402 s
.append(' %s\n' % (stats
,))
403 raise FatalError(''.join(s
))
407 def run(self
, run_options
, stats_keeper
):
408 Ctx()._projects
= read_projects(
409 artifact_manager
.get_temp_file(config
.PROJECTS
)
411 self
.symbol_stats
= SymbolStatistics(
412 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
415 symbol_map
= self
.get_symbols(run_options
)
417 # Check the symbols for consistency and bail out if there were errors:
418 self
.symbol_stats
.check_consistency(symbol_map
)
420 # Check that the symbols all have SVN paths set and that the paths
422 Ctx().output_option
.check_symbols(symbol_map
)
424 for symbol
in symbol_map
.itervalues():
425 if isinstance(symbol
, ExcludedSymbol
):
426 self
.symbol_stats
.exclude_symbol(symbol
)
428 create_symbol_database(symbol_map
.values())
430 del self
.symbol_stats
435 class FilterSymbolsPass(Pass
):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self
):
442 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
443 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
444 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
445 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
446 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
447 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
448 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
449 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
450 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
451 Ctx().revision_collector
.register_artifacts(self
)
453 def run(self
, run_options
, stats_keeper
):
454 Ctx()._projects
= read_projects(
455 artifact_manager
.get_temp_file(config
.PROJECTS
)
457 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
458 Ctx()._metadata
_db
= MetadataDatabase(
459 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
460 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
463 Ctx()._symbol
_db
= SymbolDatabase()
464 cvs_item_store
= OldCVSItemStore(
465 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
467 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
468 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
469 cPickle
.dump(cvs_item_serializer
, f
, -1)
472 rev_db
= NewSortableCVSRevisionDatabase(
473 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
477 symbol_db
= NewSortableCVSSymbolDatabase(
478 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
482 revision_collector
= Ctx().revision_collector
484 logger
.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper
.reset_cvs_rev_info()
487 revision_collector
.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
491 logger
.verbose(cvs_file_items
.cvs_file
.rcs_path
)
492 cvs_file_items
.filter_excluded_symbols()
493 cvs_file_items
.mutate_symbols()
494 cvs_file_items
.adjust_parents()
495 cvs_file_items
.refine_symbols()
496 cvs_file_items
.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items
.record_opened_symbols()
500 cvs_file_items
.record_closed_symbols()
501 cvs_file_items
.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
505 revision_collector
.process_file(cvs_file_items
)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
509 for cvs_item
in cvs_file_items
.values():
510 stats_keeper
.record_cvs_item(cvs_item
)
512 if isinstance(cvs_item
, CVSRevision
):
514 elif isinstance(cvs_item
, CVSSymbol
):
515 symbol_db
.add(cvs_item
)
517 stats_keeper
.set_stats_reflect_exclude(True)
521 revision_collector
.finish()
522 cvs_item_store
.close()
523 Ctx()._symbol
_db
.close()
524 Ctx()._cvs
_path
_db
.close()
529 class SortRevisionsPass(Pass
):
530 """Sort the revisions file."""
532 def register_artifacts(self
):
533 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
534 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
536 def run(self
, run_options
, stats_keeper
):
537 logger
.quiet("Sorting CVS revision summaries...")
539 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
540 artifact_manager
.get_temp_file(
541 config
.CVS_REVS_SORTED_DATAFILE
543 tempdirs
=[Ctx().tmpdir
],
548 class SortSymbolsPass(Pass
):
549 """Sort the symbols file."""
551 def register_artifacts(self
):
552 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
553 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
555 def run(self
, run_options
, stats_keeper
):
556 logger
.quiet("Sorting CVS symbol summaries...")
558 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
559 artifact_manager
.get_temp_file(
560 config
.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs
=[Ctx().tmpdir
],
567 class InitializeChangesetsPass(Pass
):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self
):
571 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
572 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
573 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
574 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
575 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
576 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
577 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
578 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
579 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
580 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
581 self
._register
_temp
_file
_needed
(
582 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
584 def get_revision_changesets(self
):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id
= None
595 db
= OldSortableCVSRevisionDatabase(
596 artifact_manager
.get_temp_file(
597 config
.CVS_REVS_SORTED_DATAFILE
599 self
.cvs_item_serializer
,
603 if cvs_rev
.metadata_id
!= old_metadata_id \
604 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
605 # Start a new changeset. First finish up the old changeset,
608 yield changeset_items
610 old_metadata_id
= cvs_rev
.metadata_id
611 changeset_items
.append(cvs_rev
)
612 old_timestamp
= cvs_rev
.timestamp
614 # Finish up the last changeset, if any:
616 yield changeset_items
618 def get_symbol_changesets(self
):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
627 db
= OldSortableCVSSymbolDatabase(
628 artifact_manager
.get_temp_file(
629 config
.CVS_SYMBOLS_SORTED_DATAFILE
631 self
.cvs_item_serializer
,
634 for cvs_symbol
in db
:
635 if cvs_symbol
.symbol
.id != old_symbol_id
:
636 # Start a new changeset. First finish up the old changeset,
639 yield changeset_items
641 old_symbol_id
= cvs_symbol
.symbol
.id
642 changeset_items
.append(cvs_symbol
)
644 # Finish up the last changeset, if any:
646 yield changeset_items
649 def compare_items(a
, b
):
651 cmp(a
.timestamp
, b
.timestamp
)
652 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
653 or cmp([int(x
) for x
in a
.rev
.split('.')],
654 [int(x
) for x
in b
.rev
.split('.')])
657 def break_internal_dependencies(self
, changeset_items
):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
675 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
676 for cvs_item
in changeset_items
:
677 for next_id
in cvs_item
.get_succ_ids():
678 if next_id
in changeset_cvs_item_ids
:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id
== cvs_item
.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
683 dependencies
.append((cvs_item
.id, next_id
,))
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items
.sort(self
.compare_items
)
690 for (i
, changeset_item
) in enumerate(changeset_items
):
691 indexes
[changeset_item
.id] = i
693 # How many internal dependencies would be broken by breaking the
694 # Changeset after a particular index?
695 breaks
= [0] * len(changeset_items
)
696 for (pred
, succ
,) in dependencies
:
697 pred_index
= indexes
[pred
]
698 succ_index
= indexes
[succ
]
699 breaks
[min(pred_index
, succ_index
)] += 1
700 breaks
[max(pred_index
, succ_index
)] -= 1
701 for i
in range(1, len(breaks
)):
702 breaks
[i
] += breaks
[i
- 1]
707 for i
in range(0, len(breaks
) - 1):
708 if breaks
[i
] > best_count
:
710 best_count
= breaks
[i
]
711 best_time
= (changeset_items
[i
+ 1].timestamp
712 - changeset_items
[i
].timestamp
)
713 elif breaks
[i
] == best_count \
714 and (changeset_items
[i
+ 1].timestamp
715 - changeset_items
[i
].timestamp
) < best_time
:
717 best_count
= breaks
[i
]
718 best_time
= (changeset_items
[i
+ 1].timestamp
719 - changeset_items
[i
].timestamp
)
720 # Reuse the old changeset.id for the first of the split changesets.
721 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
723 return [changeset_items
]
725 def break_all_internal_dependencies(self
, changeset_items
):
726 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
728 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
729 be part of a single changeset. Break this list into sublists,
730 where the CVSRevisions in each sublist are free of mutual
733 # This method is written non-recursively to avoid any possible
734 # problems with recursion depth.
736 changesets_to_split
= [changeset_items
]
737 while changesets_to_split
:
738 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
739 if len(changesets
) == 1:
740 [changeset_items
] = changesets
741 yield changeset_items
743 # The changeset had to be split; see if either of the
744 # fragments have to be split:
746 changesets_to_split
.extend(changesets
)
748 def get_changesets(self
):
749 """Generate (Changeset, [CVSItem,...]) for all changesets.
751 The Changesets already have their internal dependencies broken.
752 The [CVSItem,...] list is the list of CVSItems in the
753 corresponding Changeset."""
755 for changeset_items
in self
.get_revision_changesets():
756 for split_changeset_items \
757 in self
.break_all_internal_dependencies(changeset_items
):
760 self
.changeset_key_generator
.gen_id(),
761 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
763 split_changeset_items
,
766 for changeset_items
in self
.get_symbol_changesets():
768 create_symbol_changeset(
769 self
.changeset_key_generator
.gen_id(),
770 changeset_items
[0].symbol
,
771 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
776 def run(self
, run_options
, stats_keeper
):
777 logger
.quiet("Creating preliminary commit sets...")
779 Ctx()._projects
= read_projects(
780 artifact_manager
.get_temp_file(config
.PROJECTS
)
782 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
783 Ctx()._symbol
_db
= SymbolDatabase()
785 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
786 self
.cvs_item_serializer
= cPickle
.load(f
)
789 changeset_db
= ChangesetDatabase(
790 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
791 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
794 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
795 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
799 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
800 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
801 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
804 self
.changeset_key_generator
= KeyGenerator()
806 for (changeset
, changeset_items
) in self
.get_changesets():
807 if logger
.is_on(logger
.DEBUG
):
808 logger
.debug(repr(changeset
))
809 changeset_db
.store(changeset
)
810 for cvs_item
in changeset_items
:
811 self
.sorted_cvs_items_db
.add(cvs_item
)
812 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
814 self
.sorted_cvs_items_db
.close()
815 cvs_item_to_changeset_id
.close()
817 Ctx()._symbol
_db
.close()
818 Ctx()._cvs
_path
_db
.close()
820 del self
.cvs_item_serializer
825 class ProcessedChangesetLogger
:
827 self
.processed_changeset_ids
= []
829 def log(self
, changeset_id
):
830 if logger
.is_on(logger
.DEBUG
):
831 self
.processed_changeset_ids
.append(changeset_id
)
834 if self
.processed_changeset_ids
:
836 'Consumed changeset ids %s'
837 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
839 del self
.processed_changeset_ids
[:]
842 class BreakRevisionChangesetCyclesPass(Pass
):
843 """Break up any dependency cycles involving only RevisionChangesets."""
845 def register_artifacts(self
):
846 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
847 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
848 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
849 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
850 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
851 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
852 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
853 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
854 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
855 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
856 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
858 def get_source_changesets(self
):
859 old_changeset_db
= ChangesetDatabase(
860 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
861 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
864 changeset_ids
= old_changeset_db
.keys()
866 for changeset_id
in changeset_ids
:
867 yield old_changeset_db
[changeset_id
]
869 old_changeset_db
.close()
872 def break_cycle(self
, cycle
):
873 """Break up one or more changesets in CYCLE to help break the cycle.
875 CYCLE is a list of Changesets where
877 cycle[i] depends on cycle[i - 1]
879 Break up one or more changesets in CYCLE to make progress towards
880 breaking the cycle. Update self.changeset_graph accordingly.
882 It is not guaranteed that the cycle will be broken by one call to
883 this routine, but at least some progress must be made."""
885 self
.processed_changeset_logger
.flush()
888 for i
in range(len(cycle
)):
889 # It's OK if this index wraps to -1:
890 link
= ChangesetGraphLink(
891 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
893 if best_i
is None or link
< best_link
:
897 if logger
.is_on(logger
.DEBUG
):
899 'Breaking cycle %s by breaking node %x' % (
900 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
901 best_link
.changeset
.id,))
903 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
905 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
907 for changeset
in new_changesets
:
908 self
.changeset_graph
.add_new_changeset(changeset
)
910 def run(self
, run_options
, stats_keeper
):
911 logger
.quiet("Breaking revision changeset dependency cycles...")
913 Ctx()._projects
= read_projects(
914 artifact_manager
.get_temp_file(config
.PROJECTS
)
916 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
917 Ctx()._symbol
_db
= SymbolDatabase()
918 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
919 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
920 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
924 artifact_manager
.get_temp_file(
925 config
.CVS_ITEM_TO_CHANGESET
),
926 artifact_manager
.get_temp_file(
927 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
928 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
929 artifact_manager
.get_temp_file(
930 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
933 changeset_db
= ChangesetDatabase(
934 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
935 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
938 self
.changeset_graph
= ChangesetGraph(
939 changeset_db
, cvs_item_to_changeset_id
943 for changeset
in self
.get_source_changesets():
944 changeset_db
.store(changeset
)
945 if isinstance(changeset
, RevisionChangeset
):
946 self
.changeset_graph
.add_changeset(changeset
)
947 max_changeset_id
= max(max_changeset_id
, changeset
.id)
949 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
951 self
.processed_changeset_logger
= ProcessedChangesetLogger()
953 # Consume the graph, breaking cycles using self.break_cycle():
954 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
955 cycle_breaker
=self
.break_cycle
957 self
.processed_changeset_logger
.log(changeset
.id)
959 self
.processed_changeset_logger
.flush()
960 del self
.processed_changeset_logger
962 self
.changeset_graph
.close()
963 self
.changeset_graph
= None
964 Ctx()._cvs
_items
_db
.close()
965 Ctx()._symbol
_db
.close()
966 Ctx()._cvs
_path
_db
.close()
971 class RevisionTopologicalSortPass(Pass
):
972 """Sort RevisionChangesets into commit order.
974 Also convert them to OrderedChangesets, without changing their ids."""
976 def register_artifacts(self
):
977 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
978 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
979 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
980 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
981 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
982 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
983 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
984 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
985 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
986 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
988 def get_source_changesets(self
, changeset_db
):
989 changeset_ids
= changeset_db
.keys()
991 for changeset_id
in changeset_ids
:
992 yield changeset_db
[changeset_id
]
994 def get_changesets(self
):
995 changeset_db
= ChangesetDatabase(
996 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
997 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
1001 changeset_graph
= ChangesetGraph(
1003 CVSItemToChangesetTable(
1004 artifact_manager
.get_temp_file(
1005 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
1011 for changeset
in self
.get_source_changesets(changeset_db
):
1012 if isinstance(changeset
, RevisionChangeset
):
1013 changeset_graph
.add_changeset(changeset
)
1020 changeset_ids
.append(None)
1022 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1023 changeset_ids
.append(changeset
.id)
1026 changeset_ids
.append(None)
1028 for i
in range(1, len(changeset_ids
) - 1):
1029 changeset
= changeset_db
[changeset_ids
[i
]]
1030 yield OrderedChangeset(
1031 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1032 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1034 changeset_graph
.close()
1036 def run(self
, run_options
, stats_keeper
):
1037 logger
.quiet("Generating CVSRevisions in commit order...")
1039 Ctx()._projects
= read_projects(
1040 artifact_manager
.get_temp_file(config
.PROJECTS
)
1042 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1043 Ctx()._symbol
_db
= SymbolDatabase()
1044 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1045 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1046 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1049 changesets_revordered_db
= ChangesetDatabase(
1050 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1051 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1054 for changeset
in self
.get_changesets():
1055 changesets_revordered_db
.store(changeset
)
1057 changesets_revordered_db
.close()
1058 Ctx()._cvs
_items
_db
.close()
1059 Ctx()._symbol
_db
.close()
1060 Ctx()._cvs
_path
_db
.close()
1062 logger
.quiet("Done")
1065 class BreakSymbolChangesetCyclesPass(Pass
):
1066 """Break up any dependency cycles involving only SymbolChangesets."""
1068 def register_artifacts(self
):
1069 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1070 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1071 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1072 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1073 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1074 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1075 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1076 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1077 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1078 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1079 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1081 def get_source_changesets(self
):
1082 old_changeset_db
= ChangesetDatabase(
1083 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1084 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1087 changeset_ids
= old_changeset_db
.keys()
1089 for changeset_id
in changeset_ids
:
1090 yield old_changeset_db
[changeset_id
]
1092 old_changeset_db
.close()
1094 def break_cycle(self
, cycle
):
1095 """Break up one or more changesets in CYCLE to help break the cycle.
1097 CYCLE is a list of Changesets where
1099 cycle[i] depends on cycle[i - 1]
1101 Break up one or more changesets in CYCLE to make progress towards
1102 breaking the cycle. Update self.changeset_graph accordingly.
1104 It is not guaranteed that the cycle will be broken by one call to
1105 this routine, but at least some progress must be made."""
1107 self
.processed_changeset_logger
.flush()
1110 for i
in range(len(cycle
)):
1111 # It's OK if this index wraps to -1:
1112 link
= ChangesetGraphLink(
1113 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1115 if best_i
is None or link
< best_link
:
1119 if logger
.is_on(logger
.DEBUG
):
1121 'Breaking cycle %s by breaking node %x' % (
1122 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1123 best_link
.changeset
.id,))
1125 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1127 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1129 for changeset
in new_changesets
:
1130 self
.changeset_graph
.add_new_changeset(changeset
)
1132 def run(self
, run_options
, stats_keeper
):
1133 logger
.quiet("Breaking symbol changeset dependency cycles...")
1135 Ctx()._projects
= read_projects(
1136 artifact_manager
.get_temp_file(config
.PROJECTS
)
1138 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1139 Ctx()._symbol
_db
= SymbolDatabase()
1140 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1141 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1142 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1146 artifact_manager
.get_temp_file(
1147 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1148 artifact_manager
.get_temp_file(
1149 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1150 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1151 artifact_manager
.get_temp_file(
1152 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1155 changeset_db
= ChangesetDatabase(
1156 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1157 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1160 self
.changeset_graph
= ChangesetGraph(
1161 changeset_db
, cvs_item_to_changeset_id
1164 max_changeset_id
= 0
1165 for changeset
in self
.get_source_changesets():
1166 changeset_db
.store(changeset
)
1167 if isinstance(changeset
, SymbolChangeset
):
1168 self
.changeset_graph
.add_changeset(changeset
)
1169 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1171 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1173 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1175 # Consume the graph, breaking cycles using self.break_cycle():
1176 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1177 cycle_breaker
=self
.break_cycle
1179 self
.processed_changeset_logger
.log(changeset
.id)
1181 self
.processed_changeset_logger
.flush()
1182 del self
.processed_changeset_logger
1184 self
.changeset_graph
.close()
1185 self
.changeset_graph
= None
1186 Ctx()._cvs
_items
_db
.close()
1187 Ctx()._symbol
_db
.close()
1188 Ctx()._cvs
_path
_db
.close()
1190 logger
.quiet("Done")
1193 class BreakAllChangesetCyclesPass(Pass
):
1194 """Break up any dependency cycles that are closed by SymbolChangesets."""
1196 def register_artifacts(self
):
1197 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1198 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1199 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1200 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1201 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1202 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1203 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1204 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1205 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1206 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1207 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1209 def get_source_changesets(self
):
1210 old_changeset_db
= ChangesetDatabase(
1211 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1212 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1215 changeset_ids
= old_changeset_db
.keys()
1217 for changeset_id
in changeset_ids
:
1218 yield old_changeset_db
[changeset_id
]
1220 old_changeset_db
.close()
1222 def _split_retrograde_changeset(self
, changeset
):
1223 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1225 logger
.debug('Breaking retrograde changeset %x' % (changeset
.id,))
1227 self
.changeset_graph
.delete_changeset(changeset
)
1229 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1231 for cvs_branch
in changeset
.iter_cvs_items():
1232 max_pred_ordinal
= 0
1233 min_succ_ordinal
= sys
.maxint
1235 for pred_id
in cvs_branch
.get_pred_ids():
1236 pred_ordinal
= self
.ordinals
.get(
1237 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1238 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1240 for succ_id
in cvs_branch
.get_succ_ids():
1241 succ_ordinal
= self
.ordinals
.get(
1242 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1243 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1245 assert max_pred_ordinal
< min_succ_ordinal
1246 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1248 # Find the earliest successor ordinal:
1249 min_min_succ_ordinal
= sys
.maxint
1250 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1251 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1255 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1256 if max_pred_ordinal
>= min_min_succ_ordinal
:
1257 late_item_ids
.append(id)
1259 early_item_ids
.append(id)
1261 assert early_item_ids
1262 assert late_item_ids
1264 early_changeset
= changeset
.create_split_changeset(
1265 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1266 late_changeset
= changeset
.create_split_changeset(
1267 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1269 self
.changeset_graph
.add_new_changeset(early_changeset
)
1270 self
.changeset_graph
.add_new_changeset(late_changeset
)
1272 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1274 # Because of the way we constructed it, the early changeset should
1275 # not have to be split:
1276 assert not early_split
1278 self
._split
_if
_retrograde
(late_changeset
.id)
1280 def _split_if_retrograde(self
, changeset_id
):
1281 node
= self
.changeset_graph
[changeset_id
]
1284 for id in node
.pred_ids
1285 if id in self
.ordinals
1287 pred_ordinals
.sort()
1290 for id in node
.succ_ids
1291 if id in self
.ordinals
1293 succ_ordinals
.sort()
1294 if pred_ordinals
and succ_ordinals \
1295 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1296 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1301 def break_segment(self
, segment
):
1302 """Break a changeset in SEGMENT[1:-1].
1304 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1305 that range are SymbolChangesets."""
1309 for i
in range(1, len(segment
) - 1):
1310 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1312 if best_i
is None or link
< best_link
:
1316 if logger
.is_on(logger
.DEBUG
):
1318 'Breaking segment %s by breaking node %x' % (
1319 ' -> '.join(['%x' % node
.id for node
in segment
]),
1320 best_link
.changeset
.id,))
1322 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1324 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1326 for changeset
in new_changesets
:
1327 self
.changeset_graph
.add_new_changeset(changeset
)
1329 def break_cycle(self
, cycle
):
1330 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1332 CYCLE is a list of SymbolChangesets where
1334 cycle[i] depends on cycle[i - 1]
1336 . Break up one or more changesets in CYCLE to make progress
1337 towards breaking the cycle. Update self.changeset_graph
1340 It is not guaranteed that the cycle will be broken by one call to
1341 this routine, but at least some progress must be made."""
1343 if logger
.is_on(logger
.DEBUG
):
1345 'Breaking cycle %s' % (
1346 ' -> '.join(['%x' % changeset
.id
1347 for changeset
in cycle
+ [cycle
[0]]]),))
1349 # Unwrap the cycle into a segment then break the segment:
1350 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1352 def run(self
, run_options
, stats_keeper
):
1353 logger
.quiet("Breaking CVSSymbol dependency loops...")
1355 Ctx()._projects
= read_projects(
1356 artifact_manager
.get_temp_file(config
.PROJECTS
)
1358 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1359 Ctx()._symbol
_db
= SymbolDatabase()
1360 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1361 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1362 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1366 artifact_manager
.get_temp_file(
1367 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1368 artifact_manager
.get_temp_file(
1369 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1370 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1371 artifact_manager
.get_temp_file(
1372 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1375 self
.changeset_db
= ChangesetDatabase(
1376 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1377 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1380 self
.changeset_graph
= ChangesetGraph(
1381 self
.changeset_db
, self
.cvs_item_to_changeset_id
1384 # A map {changeset_id : ordinal} for OrderedChangesets:
1386 # A map {ordinal : changeset_id}:
1387 ordered_changeset_map
= {}
1388 # A list of all BranchChangeset ids:
1389 branch_changeset_ids
= []
1390 max_changeset_id
= 0
1391 for changeset
in self
.get_source_changesets():
1392 self
.changeset_db
.store(changeset
)
1393 self
.changeset_graph
.add_changeset(changeset
)
1394 if isinstance(changeset
, OrderedChangeset
):
1395 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1396 self
.ordinals
[changeset
.id] = changeset
.ordinal
1397 elif isinstance(changeset
, BranchChangeset
):
1398 branch_changeset_ids
.append(changeset
.id)
1399 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1401 # An array of ordered_changeset ids, indexed by ordinal:
1402 ordered_changesets
= []
1403 for ordinal
in range(len(ordered_changeset_map
)):
1404 id = ordered_changeset_map
[ordinal
]
1405 ordered_changesets
.append(id)
1407 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1408 del ordered_changeset_map
1410 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1412 # First we scan through all BranchChangesets looking for
1413 # changesets that are individually "retrograde" and splitting
1415 for changeset_id
in branch_changeset_ids
:
1416 self
._split
_if
_retrograde
(changeset_id
)
1420 next_ordered_changeset
= 0
1422 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1424 while self
.changeset_graph
:
1425 # Consume any nodes that don't have predecessors:
1426 for (changeset
, time_range
) \
1427 in self
.changeset_graph
.consume_nopred_nodes():
1428 self
.processed_changeset_logger
.log(changeset
.id)
1429 if changeset
.id in ordered_changeset_ids
:
1430 next_ordered_changeset
+= 1
1431 ordered_changeset_ids
.remove(changeset
.id)
1433 self
.processed_changeset_logger
.flush()
1435 if not self
.changeset_graph
:
1438 # Now work on the next ordered changeset that has not yet been
1439 # processed. BreakSymbolChangesetCyclesPass has broken any
1440 # cycles involving only SymbolChangesets, so the presence of a
1441 # cycle implies that there is at least one ordered changeset
1442 # left in the graph:
1443 assert next_ordered_changeset
< len(ordered_changesets
)
1445 id = ordered_changesets
[next_ordered_changeset
]
1446 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1448 if logger
.is_on(logger
.DEBUG
):
1449 logger
.debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1450 self
.break_segment(path
)
1452 # There were no ordered changesets among the reachable
1453 # predecessors, so do generic cycle-breaking:
1454 if logger
.is_on(logger
.DEBUG
):
1456 'Breaking generic cycle found from %s'
1457 % (self
.changeset_db
[id],)
1459 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1461 del self
.processed_changeset_logger
1462 self
.changeset_graph
.close()
1463 self
.changeset_graph
= None
1464 self
.cvs_item_to_changeset_id
= None
1465 self
.changeset_db
= None
1467 logger
.quiet("Done")
1470 class TopologicalSortPass(Pass
):
1471 """Sort changesets into commit order."""
1473 def register_artifacts(self
):
1474 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1475 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1476 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1477 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1478 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1479 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1480 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1481 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1482 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1484 def get_source_changesets(self
, changeset_db
):
1485 for changeset_id
in changeset_db
.keys():
1486 yield changeset_db
[changeset_id
]
1488 def get_changesets(self
):
1489 """Generate (changeset, timestamp) pairs in commit order."""
1491 changeset_db
= ChangesetDatabase(
1492 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1493 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1496 changeset_graph
= ChangesetGraph(
1498 CVSItemToChangesetTable(
1499 artifact_manager
.get_temp_file(
1500 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1505 symbol_changeset_ids
= set()
1507 for changeset
in self
.get_source_changesets(changeset_db
):
1508 changeset_graph
.add_changeset(changeset
)
1509 if isinstance(changeset
, SymbolChangeset
):
1510 symbol_changeset_ids
.add(changeset
.id)
1512 # Ensure a monotonically-increasing timestamp series by keeping
1513 # track of the previous timestamp and ensuring that the following
1515 timestamper
= Timestamper()
1517 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1518 timestamp
= timestamper
.get(
1519 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1521 yield (changeset
, timestamp
)
1523 changeset_graph
.close()
1525 def run(self
, run_options
, stats_keeper
):
1526 logger
.quiet("Generating CVSRevisions in commit order...")
1528 Ctx()._projects
= read_projects(
1529 artifact_manager
.get_temp_file(config
.PROJECTS
)
1531 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1532 Ctx()._symbol
_db
= SymbolDatabase()
1533 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1534 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1535 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1538 sorted_changesets
= open(
1539 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1542 for (changeset
, timestamp
) in self
.get_changesets():
1543 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1545 sorted_changesets
.close()
1547 Ctx()._cvs
_items
_db
.close()
1548 Ctx()._symbol
_db
.close()
1549 Ctx()._cvs
_path
_db
.close()
1551 logger
.quiet("Done")
1554 class CreateRevsPass(Pass
):
1555 """Generate the SVNCommit <-> CVSRevision mapping databases.
1557 SVNCommitCreator also calls SymbolingsLogger to register
1558 CVSRevisions that represent an opening or closing for a path on a
1559 branch or tag. See SymbolingsLogger for more details.
1561 This pass was formerly known as pass5."""
1563 def register_artifacts(self
):
1564 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1565 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1566 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1567 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1568 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1569 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1570 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1571 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1572 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1573 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1574 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1575 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1577 def get_changesets(self
):
1578 """Generate (changeset,timestamp,) tuples in commit order."""
1580 changeset_db
= ChangesetDatabase(
1581 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1582 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1586 artifact_manager
.get_temp_file(
1587 config
.CHANGESETS_SORTED_DATAFILE
)):
1588 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1589 yield (changeset_db
[changeset_id
], timestamp
)
1591 changeset_db
.close()
1593 def get_svn_commits(self
, creator
):
1594 """Generate the SVNCommits, in order."""
1596 for (changeset
, timestamp
) in self
.get_changesets():
1597 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1600 def log_svn_commit(self
, svn_commit
):
1601 """Output information about SVN_COMMIT."""
1604 'Creating Subversion r%d (%s)'
1605 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1608 if isinstance(svn_commit
, SVNRevisionCommit
):
1609 for cvs_rev
in svn_commit
.cvs_revs
:
1610 logger
.verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1612 def run(self
, run_options
, stats_keeper
):
1613 logger
.quiet("Mapping CVS revisions to Subversion commits...")
1615 Ctx()._projects
= read_projects(
1616 artifact_manager
.get_temp_file(config
.PROJECTS
)
1618 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1619 Ctx()._symbol
_db
= SymbolDatabase()
1620 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1621 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1622 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1625 Ctx()._symbolings
_logger
= SymbolingsLogger()
1627 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1629 creator
= SVNCommitCreator()
1630 for svn_commit
in self
.get_svn_commits(creator
):
1631 self
.log_svn_commit(svn_commit
)
1632 persistence_manager
.put_svn_commit(svn_commit
)
1634 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1637 persistence_manager
.close()
1638 Ctx()._symbolings
_logger
.close()
1639 Ctx()._cvs
_items
_db
.close()
1640 Ctx()._symbol
_db
.close()
1641 Ctx()._cvs
_path
_db
.close()
1643 logger
.quiet("Done")
1646 class SortSymbolOpeningsClosingsPass(Pass
):
1647 """This pass was formerly known as pass6."""
1649 def register_artifacts(self
):
1650 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1651 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1653 def run(self
, run_options
, stats_keeper
):
1654 logger
.quiet("Sorting symbolic name source revisions...")
1657 line
= line
.split(' ', 2)
1658 return (int(line
[0], 16), int(line
[1]), line
[2],)
1661 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1662 artifact_manager
.get_temp_file(
1663 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1666 tempdirs
=[Ctx().tmpdir
],
1668 logger
.quiet("Done")
1671 class IndexSymbolsPass(Pass
):
1672 """This pass was formerly known as pass7."""
1674 def register_artifacts(self
):
1675 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1676 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1677 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1678 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1680 def generate_offsets_for_symbolings(self
):
1681 """This function iterates through all the lines in
1682 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1683 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1684 where SYMBOLIC_NAME is first encountered. This will allow us to
1685 seek to the various offsets in the file and sequentially read only
1686 the openings and closings that we need."""
1691 artifact_manager
.get_temp_file(
1692 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1700 id, svn_revnum
, ignored
= line
.split(" ", 2)
1703 logger
.verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1710 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1711 cPickle
.dump(offsets
, offsets_db
, -1)
1714 def run(self
, run_options
, stats_keeper
):
1715 logger
.quiet("Determining offsets for all symbolic names...")
1716 Ctx()._projects
= read_projects(
1717 artifact_manager
.get_temp_file(config
.PROJECTS
)
1719 Ctx()._symbol
_db
= SymbolDatabase()
1720 self
.generate_offsets_for_symbolings()
1721 Ctx()._symbol
_db
.close()
1722 logger
.quiet("Done.")
1725 class OutputPass(Pass
):
1726 """This pass was formerly known as pass8."""
1728 def register_artifacts(self
):
1729 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1730 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1731 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1732 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1733 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1734 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1735 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1736 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1737 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1738 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1739 Ctx().output_option
.register_artifacts(self
)
1741 def run(self
, run_options
, stats_keeper
):
1742 Ctx()._projects
= read_projects(
1743 artifact_manager
.get_temp_file(config
.PROJECTS
)
1745 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1746 Ctx()._metadata
_db
= MetadataDatabase(
1747 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1748 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1751 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1752 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1753 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1755 Ctx()._symbol
_db
= SymbolDatabase()
1756 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1758 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1761 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1763 svn_commit
.output(Ctx().output_option
)
1765 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1767 Ctx().output_option
.cleanup()
1768 Ctx()._persistence
_manager
.close()
1770 Ctx()._symbol
_db
.close()
1771 Ctx()._cvs
_items
_db
.close()
1772 Ctx()._metadata
_db
.close()
1773 Ctx()._cvs
_path
_db
.close()
1776 # The list of passes constituting a run of cvs2svn:
1779 CleanMetadataPass(),
1780 CollateSymbolsPass(),
1781 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1782 FilterSymbolsPass(),
1783 SortRevisionsPass(),
1785 InitializeChangesetsPass(),
1786 #CheckIndexedItemStoreDependenciesPass(
1787 # config.CVS_ITEMS_SORTED_STORE,
1788 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1789 BreakRevisionChangesetCyclesPass(),
1790 RevisionTopologicalSortPass(),
1791 BreakSymbolChangesetCyclesPass(),
1792 BreakAllChangesetCyclesPass(),
1793 TopologicalSortPass(),
1795 SortSymbolOpeningsClosingsPass(),