1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import logger
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.repository_walker
import walk_repository
78 from cvs2svn_lib
.collect_data
import CollectData
79 from cvs2svn_lib
.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib
.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 class CollectRevsPass(Pass
):
86 """This pass was formerly known as pass1."""
88 def register_artifacts(self
):
89 self
._register
_temp
_file
(config
.PROJECTS
)
90 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
91 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
92 self
._register
_temp
_file
(config
.METADATA_STORE
)
93 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
94 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
96 def run(self
, run_options
, stats_keeper
):
97 logger
.quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(stats_keeper
)
102 # Key generator for CVSFiles:
103 file_key_generator
= KeyGenerator()
105 for project
in run_options
.projects
:
106 Ctx()._projects
[project
.id] = project
109 walk_repository(project
, file_key_generator
, cd
.record_fatal_error
),
111 run_options
.projects
= None
113 fatal_errors
= cd
.close()
116 raise FatalException("Pass 1 complete.\n"
119 + "\n".join(fatal_errors
) + "\n"
120 + "Exited due to fatal error(s).")
122 Ctx()._cvs
_path
_db
.close()
123 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
127 class CleanMetadataPass(Pass
):
128 """Clean up CVS revision metadata and write it to a new database."""
130 def register_artifacts(self
):
131 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
132 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
133 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
134 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
136 def _get_clean_author(self
, author
):
137 """Return AUTHOR, converted appropriately to UTF8.
139 Raise a UnicodeException if it cannot be converted using the
140 configured cvs_author_decoder."""
143 return self
._authors
[author
]
148 clean_author
= Ctx().cvs_author_decoder(author
)
150 self
._authors
[author
] = author
151 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
154 clean_author
= clean_author
.encode('utf8')
156 self
._authors
[author
] = author
157 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
159 self
._authors
[author
] = clean_author
162 def _get_clean_log_msg(self
, log_msg
):
163 """Return LOG_MSG, converted appropriately to UTF8.
165 Raise a UnicodeException if it cannot be converted using the
166 configured cvs_log_decoder."""
169 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
172 'Problem decoding log message:\n'
176 % ('-' * 75, log_msg
, '-' * 75,)
180 return clean_log_msg
.encode('utf8')
183 'Problem encoding log message:\n'
187 % ('-' * 75, log_msg
, '-' * 75,)
190 def _clean_metadata(self
, metadata
):
191 """Clean up METADATA by overwriting its members as necessary."""
194 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
195 except UnicodeError, e
:
196 logger
.warn('%s: %s' % (warning_prefix
, e
,))
200 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
201 except UnicodeError, e
:
202 logger
.warn('%s: %s' % (warning_prefix
, e
,))
205 def run(self
, run_options
, stats_keeper
):
206 logger
.quiet("Converting metadata to UTF8...")
207 metadata_db
= MetadataDatabase(
208 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
209 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
212 metadata_clean_db
= MetadataDatabase(
213 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
214 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
218 self
.warnings
= False
220 # A map {author : clean_author} for those known (to avoid
221 # repeating warnings):
224 for id in metadata_db
.iterkeys():
225 metadata
= metadata_db
[id]
227 # Record the original author name because it might be needed for
228 # expanding CVS keywords:
229 metadata
.original_author
= metadata
.author
231 self
._clean
_metadata
(metadata
)
233 metadata_clean_db
[id] = metadata
237 'There were warnings converting author names and/or log messages\n'
238 'to Unicode (see messages above). Please restart this pass\n'
239 'with one or more \'--encoding\' parameters or with\n'
240 '\'--fallback-encoding\'.'
243 metadata_clean_db
.close()
248 class CollateSymbolsPass(Pass
):
249 """Divide symbols into branches, tags, and excludes."""
255 ExcludedSymbol
: 'exclude',
259 def register_artifacts(self
):
260 self
._register
_temp
_file
(config
.SYMBOL_DB
)
261 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
262 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
264 def get_symbol(self
, run_options
, stats
):
265 """Use StrategyRules to decide what to do with a symbol.
267 STATS is an instance of symbol_statistics._Stats describing an
268 instance of Symbol or Trunk. To determine how the symbol is to be
269 converted, consult the StrategyRules in the project's
270 symbol_strategy_rules. Each rule is allowed a chance to change
271 the way the symbol will be converted. If the symbol is not a
272 Trunk or TypedSymbol after all rules have run, raise
273 IndeterminateSymbolException."""
276 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
278 symbol
= rule
.get_symbol(symbol
, stats
)
279 assert symbol
is not None
281 stats
.check_valid(symbol
)
285 def log_symbol_summary(self
, stats
, symbol
):
286 if not self
.symbol_info_file
:
289 if isinstance(symbol
, Trunk
):
291 preferred_parent_name
= '.'
293 name
= stats
.lod
.name
294 if symbol
.preferred_parent_id
is None:
295 preferred_parent_name
= '.'
297 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
298 if isinstance(preferred_parent
, Trunk
):
299 preferred_parent_name
= '.trunk.'
301 preferred_parent_name
= preferred_parent
.name
303 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
304 symbol_path
= symbol
.base_path
308 self
.symbol_info_file
.write(
309 '%-5d %-30s %-10s %s %s\n' % (
310 stats
.lod
.project
.id,
312 self
.conversion_names
[symbol
.__class
__],
314 preferred_parent_name
,
317 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
318 parent_counts
= stats
.possible_parents
.items()
320 self
.symbol_info_file
.write(' # Possible parents:\n')
321 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
322 for (pp
, count
) in parent_counts
:
323 if isinstance(pp
, Trunk
):
324 self
.symbol_info_file
.write(
325 ' # .trunk. : %d\n' % (count
,)
328 self
.symbol_info_file
.write(
329 ' # %s : %d\n' % (pp
.name
, count
,)
332 def get_symbols(self
, run_options
):
333 """Return a map telling how to convert symbols.
335 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
336 indicating how each symbol should be converted. Trunk objects in
337 SYMBOL_STATS are passed through unchanged. One object is included
338 in the return value for each line of development described in
341 Raise FatalError if there was an error."""
346 if Ctx().symbol_info_filename
is not None:
347 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
348 self
.symbol_info_file
.write(
349 '# Columns: project_id symbol_name conversion symbol_path '
350 'preferred_parent_name\n'
353 self
.symbol_info_file
= None
355 # Initialize each symbol strategy rule a single time, even if it
356 # is used in more than one project. First define a map from
357 # object id to symbol strategy rule:
359 for rule_list
in run_options
.project_symbol_strategy_rules
:
360 for rule
in rule_list
:
361 rules
[id(rule
)] = rule
363 for rule
in rules
.itervalues():
364 rule
.start(self
.symbol_stats
)
368 for stats
in self
.symbol_stats
:
370 symbol
= self
.get_symbol(run_options
, stats
)
371 except IndeterminateSymbolException
, e
:
372 self
.log_symbol_summary(stats
, stats
.lod
)
373 mismatches
.append(e
.stats
)
374 except SymbolPlanError
, e
:
375 self
.log_symbol_summary(stats
, stats
.lod
)
378 self
.log_symbol_summary(stats
, symbol
)
379 retval
[stats
.lod
] = symbol
381 for rule
in rules
.itervalues():
384 if self
.symbol_info_file
:
385 self
.symbol_info_file
.close()
387 del self
.symbol_info_file
389 if errors
or mismatches
:
390 s
= ['Problems determining how symbols should be converted:\n']
392 s
.append('%s\n' % (e
,))
395 'It is not clear how the following symbols '
396 'should be converted.\n'
397 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
399 '--symbol-default to resolve the ambiguity.\n'
401 for stats
in mismatches
:
402 s
.append(' %s\n' % (stats
,))
403 raise FatalError(''.join(s
))
407 def run(self
, run_options
, stats_keeper
):
408 Ctx()._projects
= read_projects(
409 artifact_manager
.get_temp_file(config
.PROJECTS
)
411 self
.symbol_stats
= SymbolStatistics(
412 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
415 symbol_map
= self
.get_symbols(run_options
)
417 # Check the symbols for consistency and bail out if there were errors:
418 self
.symbol_stats
.check_consistency(symbol_map
)
420 # Check that the symbols all have SVN paths set and that the paths
422 Ctx().output_option
.check_symbols(symbol_map
)
424 for symbol
in symbol_map
.itervalues():
425 if isinstance(symbol
, ExcludedSymbol
):
426 self
.symbol_stats
.exclude_symbol(symbol
)
428 create_symbol_database(symbol_map
.values())
430 del self
.symbol_stats
435 class FilterSymbolsPass(Pass
):
436 """Delete any branches/tags that are to be excluded.
438 Also delete revisions on excluded branches, and delete other
439 references to the excluded symbols."""
441 def register_artifacts(self
):
442 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
443 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
444 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
445 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
446 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
447 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
448 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
449 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
450 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
451 Ctx().revision_collector
.register_artifacts(self
)
453 def run(self
, run_options
, stats_keeper
):
454 Ctx()._projects
= read_projects(
455 artifact_manager
.get_temp_file(config
.PROJECTS
)
457 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
458 Ctx()._metadata
_db
= MetadataDatabase(
459 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
460 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
463 Ctx()._symbol
_db
= SymbolDatabase()
464 cvs_item_store
= OldCVSItemStore(
465 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
467 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
468 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
469 cPickle
.dump(cvs_item_serializer
, f
, -1)
472 rev_db
= NewSortableCVSRevisionDatabase(
473 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
477 symbol_db
= NewSortableCVSSymbolDatabase(
478 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
482 revision_collector
= Ctx().revision_collector
484 logger
.quiet("Filtering out excluded symbols and summarizing items...")
486 stats_keeper
.reset_cvs_rev_info()
487 revision_collector
.start()
489 # Process the cvs items store one file at a time:
490 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
491 logger
.verbose(cvs_file_items
.cvs_file
.rcs_path
)
492 cvs_file_items
.filter_excluded_symbols()
493 cvs_file_items
.mutate_symbols()
494 cvs_file_items
.adjust_parents()
495 cvs_file_items
.refine_symbols()
496 cvs_file_items
.determine_revision_properties(
497 Ctx().revision_property_setters
499 cvs_file_items
.record_opened_symbols()
500 cvs_file_items
.record_closed_symbols()
501 cvs_file_items
.check_link_consistency()
503 # Give the revision collector a chance to collect data about the
505 revision_collector
.process_file(cvs_file_items
)
507 # Store whatever is left to the new file and update statistics:
508 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
509 for cvs_item
in cvs_file_items
.values():
510 stats_keeper
.record_cvs_item(cvs_item
)
512 if isinstance(cvs_item
, CVSRevision
):
514 elif isinstance(cvs_item
, CVSSymbol
):
515 symbol_db
.add(cvs_item
)
517 stats_keeper
.set_stats_reflect_exclude(True)
521 revision_collector
.finish()
522 cvs_item_store
.close()
523 Ctx()._symbol
_db
.close()
524 Ctx()._cvs
_path
_db
.close()
529 class SortRevisionsPass(Pass
):
530 """Sort the revisions file."""
532 def register_artifacts(self
):
533 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
534 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
536 def run(self
, run_options
, stats_keeper
):
537 logger
.quiet("Sorting CVS revision summaries...")
539 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
540 artifact_manager
.get_temp_file(
541 config
.CVS_REVS_SORTED_DATAFILE
543 tempdirs
=[Ctx().tmpdir
],
548 class SortSymbolsPass(Pass
):
549 """Sort the symbols file."""
551 def register_artifacts(self
):
552 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
553 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
555 def run(self
, run_options
, stats_keeper
):
556 logger
.quiet("Sorting CVS symbol summaries...")
558 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
559 artifact_manager
.get_temp_file(
560 config
.CVS_SYMBOLS_SORTED_DATAFILE
562 tempdirs
=[Ctx().tmpdir
],
567 class InitializeChangesetsPass(Pass
):
568 """Create preliminary CommitSets."""
570 def register_artifacts(self
):
571 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
572 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
573 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
574 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
575 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
576 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
577 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
578 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
579 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
580 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
581 self
._register
_temp
_file
_needed
(
582 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
584 def get_revision_changesets(self
):
585 """Generate revision changesets, one at a time.
587 Each time, yield a list of CVSRevisions that might potentially
588 consititute a changeset."""
590 # Create changesets for CVSRevisions:
591 old_metadata_id
= None
595 db
= OldSortableCVSRevisionDatabase(
596 artifact_manager
.get_temp_file(
597 config
.CVS_REVS_SORTED_DATAFILE
599 self
.cvs_item_serializer
,
603 if cvs_rev
.metadata_id
!= old_metadata_id \
604 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
605 # Start a new changeset. First finish up the old changeset,
608 yield changeset_items
610 old_metadata_id
= cvs_rev
.metadata_id
611 changeset_items
.append(cvs_rev
)
612 old_timestamp
= cvs_rev
.timestamp
614 # Finish up the last changeset, if any:
616 yield changeset_items
618 def get_symbol_changesets(self
):
619 """Generate symbol changesets, one at a time.
621 Each time, yield a list of CVSSymbols that might potentially
622 consititute a changeset."""
627 db
= OldSortableCVSSymbolDatabase(
628 artifact_manager
.get_temp_file(
629 config
.CVS_SYMBOLS_SORTED_DATAFILE
631 self
.cvs_item_serializer
,
634 for cvs_symbol
in db
:
635 if cvs_symbol
.symbol
.id != old_symbol_id
:
636 # Start a new changeset. First finish up the old changeset,
639 yield changeset_items
641 old_symbol_id
= cvs_symbol
.symbol
.id
642 changeset_items
.append(cvs_symbol
)
644 # Finish up the last changeset, if any:
646 yield changeset_items
649 def compare_items(a
, b
):
651 cmp(a
.timestamp
, b
.timestamp
)
652 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
653 or cmp([int(x
) for x
in a
.rev
.split('.')],
654 [int(x
) for x
in b
.rev
.split('.')])
657 def break_internal_dependencies(self
, changeset_items
):
658 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
660 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
661 belong in a single RevisionChangeset, but there might be internal
662 dependencies among the items. Return a list of lists, where each
663 sublist is a list of CVSRevisions and at least one internal
664 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
665 to be split, then the return value will contain a single value,
666 namely the original value of CHANGESET_ITEMS. Split
667 CHANGESET_ITEMS at most once, even though the resulting changesets
668 might themselves have internal dependencies."""
670 # We only look for succ dependencies, since by doing so we
671 # automatically cover pred dependencies as well. First create a
672 # list of tuples (pred, succ) of id pairs for CVSItems that depend
675 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
676 for cvs_item
in changeset_items
:
677 for next_id
in cvs_item
.get_succ_ids():
678 if next_id
in changeset_cvs_item_ids
:
679 # Sanity check: a CVSItem should never depend on itself:
680 if next_id
== cvs_item
.id:
681 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
683 dependencies
.append((cvs_item
.id, next_id
,))
686 # Sort the changeset_items in a defined order (chronological to the
687 # extent that the timestamps are correct and unique).
688 changeset_items
.sort(self
.compare_items
)
690 for (i
, changeset_item
) in enumerate(changeset_items
):
691 indexes
[changeset_item
.id] = i
692 # How many internal dependencies would be broken by breaking the
693 # Changeset after a particular index?
694 breaks
= [0] * len(changeset_items
)
695 for (pred
, succ
,) in dependencies
:
696 pred_index
= indexes
[pred
]
697 succ_index
= indexes
[succ
]
698 breaks
[min(pred_index
, succ_index
)] += 1
699 breaks
[max(pred_index
, succ_index
)] -= 1
703 for i
in range(1, len(breaks
)):
704 breaks
[i
] += breaks
[i
- 1]
705 for i
in range(0, len(breaks
) - 1):
706 if breaks
[i
] > best_count
:
708 best_count
= breaks
[i
]
709 best_time
= (changeset_items
[i
+ 1].timestamp
710 - changeset_items
[i
].timestamp
)
711 elif breaks
[i
] == best_count \
712 and (changeset_items
[i
+ 1].timestamp
713 - changeset_items
[i
].timestamp
) < best_time
:
715 best_count
= breaks
[i
]
716 best_time
= (changeset_items
[i
+ 1].timestamp
717 - changeset_items
[i
].timestamp
)
718 # Reuse the old changeset.id for the first of the split changesets.
719 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
721 return [changeset_items
]
723 def break_all_internal_dependencies(self
, changeset_items
):
724 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
726 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
727 be part of a single changeset. Break this list into sublists,
728 where the CVSRevisions in each sublist are free of mutual
731 # This method is written non-recursively to avoid any possible
732 # problems with recursion depth.
734 changesets_to_split
= [changeset_items
]
735 while changesets_to_split
:
736 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
737 if len(changesets
) == 1:
738 [changeset_items
] = changesets
739 yield changeset_items
741 # The changeset had to be split; see if either of the
742 # fragments have to be split:
744 changesets_to_split
.extend(changesets
)
746 def get_changesets(self
):
747 """Generate (Changeset, [CVSItem,...]) for all changesets.
749 The Changesets already have their internal dependencies broken.
750 The [CVSItem,...] list is the list of CVSItems in the
751 corresponding Changeset."""
753 for changeset_items
in self
.get_revision_changesets():
754 for split_changeset_items \
755 in self
.break_all_internal_dependencies(changeset_items
):
758 self
.changeset_key_generator
.gen_id(),
759 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
761 split_changeset_items
,
764 for changeset_items
in self
.get_symbol_changesets():
766 create_symbol_changeset(
767 self
.changeset_key_generator
.gen_id(),
768 changeset_items
[0].symbol
,
769 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
774 def run(self
, run_options
, stats_keeper
):
775 logger
.quiet("Creating preliminary commit sets...")
777 Ctx()._projects
= read_projects(
778 artifact_manager
.get_temp_file(config
.PROJECTS
)
780 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
781 Ctx()._symbol
_db
= SymbolDatabase()
783 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
784 self
.cvs_item_serializer
= cPickle
.load(f
)
787 changeset_db
= ChangesetDatabase(
788 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
789 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
792 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
793 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
797 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
798 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
799 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
802 self
.changeset_key_generator
= KeyGenerator()
804 for (changeset
, changeset_items
) in self
.get_changesets():
805 if logger
.is_on(logger
.DEBUG
):
806 logger
.debug(repr(changeset
))
807 changeset_db
.store(changeset
)
808 for cvs_item
in changeset_items
:
809 self
.sorted_cvs_items_db
.add(cvs_item
)
810 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
812 self
.sorted_cvs_items_db
.close()
813 cvs_item_to_changeset_id
.close()
815 Ctx()._symbol
_db
.close()
816 Ctx()._cvs
_path
_db
.close()
818 del self
.cvs_item_serializer
823 class ProcessedChangesetLogger
:
825 self
.processed_changeset_ids
= []
827 def log(self
, changeset_id
):
828 if logger
.is_on(logger
.DEBUG
):
829 self
.processed_changeset_ids
.append(changeset_id
)
832 if self
.processed_changeset_ids
:
834 'Consumed changeset ids %s'
835 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
837 del self
.processed_changeset_ids
[:]
840 class BreakRevisionChangesetCyclesPass(Pass
):
841 """Break up any dependency cycles involving only RevisionChangesets."""
843 def register_artifacts(self
):
844 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
845 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
846 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
847 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
848 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
849 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
850 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
851 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
852 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
853 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
854 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
856 def get_source_changesets(self
):
857 old_changeset_db
= ChangesetDatabase(
858 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
859 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
862 changeset_ids
= old_changeset_db
.keys()
864 for changeset_id
in changeset_ids
:
865 yield old_changeset_db
[changeset_id
]
867 old_changeset_db
.close()
870 def break_cycle(self
, cycle
):
871 """Break up one or more changesets in CYCLE to help break the cycle.
873 CYCLE is a list of Changesets where
875 cycle[i] depends on cycle[i - 1]
877 Break up one or more changesets in CYCLE to make progress towards
878 breaking the cycle. Update self.changeset_graph accordingly.
880 It is not guaranteed that the cycle will be broken by one call to
881 this routine, but at least some progress must be made."""
883 self
.processed_changeset_logger
.flush()
886 for i
in range(len(cycle
)):
887 # It's OK if this index wraps to -1:
888 link
= ChangesetGraphLink(
889 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
891 if best_i
is None or link
< best_link
:
895 if logger
.is_on(logger
.DEBUG
):
897 'Breaking cycle %s by breaking node %x' % (
898 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
899 best_link
.changeset
.id,))
901 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
903 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
905 for changeset
in new_changesets
:
906 self
.changeset_graph
.add_new_changeset(changeset
)
908 def run(self
, run_options
, stats_keeper
):
909 logger
.quiet("Breaking revision changeset dependency cycles...")
911 Ctx()._projects
= read_projects(
912 artifact_manager
.get_temp_file(config
.PROJECTS
)
914 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
915 Ctx()._symbol
_db
= SymbolDatabase()
916 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
917 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
918 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
922 artifact_manager
.get_temp_file(
923 config
.CVS_ITEM_TO_CHANGESET
),
924 artifact_manager
.get_temp_file(
925 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
926 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
927 artifact_manager
.get_temp_file(
928 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
931 changeset_db
= ChangesetDatabase(
932 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
933 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
936 self
.changeset_graph
= ChangesetGraph(
937 changeset_db
, cvs_item_to_changeset_id
941 for changeset
in self
.get_source_changesets():
942 changeset_db
.store(changeset
)
943 if isinstance(changeset
, RevisionChangeset
):
944 self
.changeset_graph
.add_changeset(changeset
)
945 max_changeset_id
= max(max_changeset_id
, changeset
.id)
947 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
949 self
.processed_changeset_logger
= ProcessedChangesetLogger()
951 # Consume the graph, breaking cycles using self.break_cycle():
952 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
953 cycle_breaker
=self
.break_cycle
955 self
.processed_changeset_logger
.log(changeset
.id)
957 self
.processed_changeset_logger
.flush()
958 del self
.processed_changeset_logger
960 self
.changeset_graph
.close()
961 self
.changeset_graph
= None
962 Ctx()._cvs
_items
_db
.close()
963 Ctx()._symbol
_db
.close()
964 Ctx()._cvs
_path
_db
.close()
969 class RevisionTopologicalSortPass(Pass
):
970 """Sort RevisionChangesets into commit order.
972 Also convert them to OrderedChangesets, without changing their ids."""
974 def register_artifacts(self
):
975 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
976 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
977 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
978 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
979 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
980 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
981 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
982 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
983 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
984 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
986 def get_source_changesets(self
, changeset_db
):
987 changeset_ids
= changeset_db
.keys()
989 for changeset_id
in changeset_ids
:
990 yield changeset_db
[changeset_id
]
992 def get_changesets(self
):
993 changeset_db
= ChangesetDatabase(
994 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
995 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
999 changeset_graph
= ChangesetGraph(
1001 CVSItemToChangesetTable(
1002 artifact_manager
.get_temp_file(
1003 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
1009 for changeset
in self
.get_source_changesets(changeset_db
):
1010 if isinstance(changeset
, RevisionChangeset
):
1011 changeset_graph
.add_changeset(changeset
)
1018 changeset_ids
.append(None)
1020 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1021 changeset_ids
.append(changeset
.id)
1024 changeset_ids
.append(None)
1026 for i
in range(1, len(changeset_ids
) - 1):
1027 changeset
= changeset_db
[changeset_ids
[i
]]
1028 yield OrderedChangeset(
1029 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1030 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1032 changeset_graph
.close()
1034 def run(self
, run_options
, stats_keeper
):
1035 logger
.quiet("Generating CVSRevisions in commit order...")
1037 Ctx()._projects
= read_projects(
1038 artifact_manager
.get_temp_file(config
.PROJECTS
)
1040 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1041 Ctx()._symbol
_db
= SymbolDatabase()
1042 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1043 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1044 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1047 changesets_revordered_db
= ChangesetDatabase(
1048 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1049 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1052 for changeset
in self
.get_changesets():
1053 changesets_revordered_db
.store(changeset
)
1055 changesets_revordered_db
.close()
1056 Ctx()._cvs
_items
_db
.close()
1057 Ctx()._symbol
_db
.close()
1058 Ctx()._cvs
_path
_db
.close()
1060 logger
.quiet("Done")
1063 class BreakSymbolChangesetCyclesPass(Pass
):
1064 """Break up any dependency cycles involving only SymbolChangesets."""
1066 def register_artifacts(self
):
1067 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1068 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1069 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1070 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1071 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1072 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1073 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1074 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1075 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1076 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1077 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1079 def get_source_changesets(self
):
1080 old_changeset_db
= ChangesetDatabase(
1081 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1082 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1085 changeset_ids
= old_changeset_db
.keys()
1087 for changeset_id
in changeset_ids
:
1088 yield old_changeset_db
[changeset_id
]
1090 old_changeset_db
.close()
1092 def break_cycle(self
, cycle
):
1093 """Break up one or more changesets in CYCLE to help break the cycle.
1095 CYCLE is a list of Changesets where
1097 cycle[i] depends on cycle[i - 1]
1099 Break up one or more changesets in CYCLE to make progress towards
1100 breaking the cycle. Update self.changeset_graph accordingly.
1102 It is not guaranteed that the cycle will be broken by one call to
1103 this routine, but at least some progress must be made."""
1105 self
.processed_changeset_logger
.flush()
1108 for i
in range(len(cycle
)):
1109 # It's OK if this index wraps to -1:
1110 link
= ChangesetGraphLink(
1111 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1113 if best_i
is None or link
< best_link
:
1117 if logger
.is_on(logger
.DEBUG
):
1119 'Breaking cycle %s by breaking node %x' % (
1120 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1121 best_link
.changeset
.id,))
1123 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1125 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1127 for changeset
in new_changesets
:
1128 self
.changeset_graph
.add_new_changeset(changeset
)
1130 def run(self
, run_options
, stats_keeper
):
1131 logger
.quiet("Breaking symbol changeset dependency cycles...")
1133 Ctx()._projects
= read_projects(
1134 artifact_manager
.get_temp_file(config
.PROJECTS
)
1136 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1137 Ctx()._symbol
_db
= SymbolDatabase()
1138 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1139 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1140 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1144 artifact_manager
.get_temp_file(
1145 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1146 artifact_manager
.get_temp_file(
1147 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1148 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1149 artifact_manager
.get_temp_file(
1150 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1153 changeset_db
= ChangesetDatabase(
1154 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1155 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1158 self
.changeset_graph
= ChangesetGraph(
1159 changeset_db
, cvs_item_to_changeset_id
1162 max_changeset_id
= 0
1163 for changeset
in self
.get_source_changesets():
1164 changeset_db
.store(changeset
)
1165 if isinstance(changeset
, SymbolChangeset
):
1166 self
.changeset_graph
.add_changeset(changeset
)
1167 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1169 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1171 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1173 # Consume the graph, breaking cycles using self.break_cycle():
1174 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1175 cycle_breaker
=self
.break_cycle
1177 self
.processed_changeset_logger
.log(changeset
.id)
1179 self
.processed_changeset_logger
.flush()
1180 del self
.processed_changeset_logger
1182 self
.changeset_graph
.close()
1183 self
.changeset_graph
= None
1184 Ctx()._cvs
_items
_db
.close()
1185 Ctx()._symbol
_db
.close()
1186 Ctx()._cvs
_path
_db
.close()
1188 logger
.quiet("Done")
1191 class BreakAllChangesetCyclesPass(Pass
):
1192 """Break up any dependency cycles that are closed by SymbolChangesets."""
1194 def register_artifacts(self
):
1195 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1196 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1197 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1198 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1199 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1200 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1201 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1202 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1203 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1204 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1205 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1207 def get_source_changesets(self
):
1208 old_changeset_db
= ChangesetDatabase(
1209 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1210 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1213 changeset_ids
= old_changeset_db
.keys()
1215 for changeset_id
in changeset_ids
:
1216 yield old_changeset_db
[changeset_id
]
1218 old_changeset_db
.close()
1220 def _split_retrograde_changeset(self
, changeset
):
1221 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1223 logger
.debug('Breaking retrograde changeset %x' % (changeset
.id,))
1225 self
.changeset_graph
.delete_changeset(changeset
)
1227 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1229 for cvs_branch
in changeset
.iter_cvs_items():
1230 max_pred_ordinal
= 0
1231 min_succ_ordinal
= sys
.maxint
1233 for pred_id
in cvs_branch
.get_pred_ids():
1234 pred_ordinal
= self
.ordinals
.get(
1235 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1236 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1238 for succ_id
in cvs_branch
.get_succ_ids():
1239 succ_ordinal
= self
.ordinals
.get(
1240 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1241 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1243 assert max_pred_ordinal
< min_succ_ordinal
1244 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1246 # Find the earliest successor ordinal:
1247 min_min_succ_ordinal
= sys
.maxint
1248 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1249 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1253 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1254 if max_pred_ordinal
>= min_min_succ_ordinal
:
1255 late_item_ids
.append(id)
1257 early_item_ids
.append(id)
1259 assert early_item_ids
1260 assert late_item_ids
1262 early_changeset
= changeset
.create_split_changeset(
1263 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1264 late_changeset
= changeset
.create_split_changeset(
1265 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1267 self
.changeset_graph
.add_new_changeset(early_changeset
)
1268 self
.changeset_graph
.add_new_changeset(late_changeset
)
1270 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1272 # Because of the way we constructed it, the early changeset should
1273 # not have to be split:
1274 assert not early_split
1276 self
._split
_if
_retrograde
(late_changeset
.id)
1278 def _split_if_retrograde(self
, changeset_id
):
1279 node
= self
.changeset_graph
[changeset_id
]
1282 for id in node
.pred_ids
1283 if id in self
.ordinals
1285 pred_ordinals
.sort()
1288 for id in node
.succ_ids
1289 if id in self
.ordinals
1291 succ_ordinals
.sort()
1292 if pred_ordinals
and succ_ordinals \
1293 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1294 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1299 def break_segment(self
, segment
):
1300 """Break a changeset in SEGMENT[1:-1].
1302 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1303 that range are SymbolChangesets."""
1307 for i
in range(1, len(segment
) - 1):
1308 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1310 if best_i
is None or link
< best_link
:
1314 if logger
.is_on(logger
.DEBUG
):
1316 'Breaking segment %s by breaking node %x' % (
1317 ' -> '.join(['%x' % node
.id for node
in segment
]),
1318 best_link
.changeset
.id,))
1320 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1322 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1324 for changeset
in new_changesets
:
1325 self
.changeset_graph
.add_new_changeset(changeset
)
1327 def break_cycle(self
, cycle
):
1328 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1330 CYCLE is a list of SymbolChangesets where
1332 cycle[i] depends on cycle[i - 1]
1334 . Break up one or more changesets in CYCLE to make progress
1335 towards breaking the cycle. Update self.changeset_graph
1338 It is not guaranteed that the cycle will be broken by one call to
1339 this routine, but at least some progress must be made."""
1341 if logger
.is_on(logger
.DEBUG
):
1343 'Breaking cycle %s' % (
1344 ' -> '.join(['%x' % changeset
.id
1345 for changeset
in cycle
+ [cycle
[0]]]),))
1347 # Unwrap the cycle into a segment then break the segment:
1348 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1350 def run(self
, run_options
, stats_keeper
):
1351 logger
.quiet("Breaking CVSSymbol dependency loops...")
1353 Ctx()._projects
= read_projects(
1354 artifact_manager
.get_temp_file(config
.PROJECTS
)
1356 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1357 Ctx()._symbol
_db
= SymbolDatabase()
1358 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1359 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1360 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1364 artifact_manager
.get_temp_file(
1365 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1366 artifact_manager
.get_temp_file(
1367 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1368 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1369 artifact_manager
.get_temp_file(
1370 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1373 self
.changeset_db
= ChangesetDatabase(
1374 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1375 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1378 self
.changeset_graph
= ChangesetGraph(
1379 self
.changeset_db
, self
.cvs_item_to_changeset_id
1382 # A map {changeset_id : ordinal} for OrderedChangesets:
1384 # A map {ordinal : changeset_id}:
1385 ordered_changeset_map
= {}
1386 # A list of all BranchChangeset ids:
1387 branch_changeset_ids
= []
1388 max_changeset_id
= 0
1389 for changeset
in self
.get_source_changesets():
1390 self
.changeset_db
.store(changeset
)
1391 self
.changeset_graph
.add_changeset(changeset
)
1392 if isinstance(changeset
, OrderedChangeset
):
1393 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1394 self
.ordinals
[changeset
.id] = changeset
.ordinal
1395 elif isinstance(changeset
, BranchChangeset
):
1396 branch_changeset_ids
.append(changeset
.id)
1397 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1399 # An array of ordered_changeset ids, indexed by ordinal:
1400 ordered_changesets
= []
1401 for ordinal
in range(len(ordered_changeset_map
)):
1402 id = ordered_changeset_map
[ordinal
]
1403 ordered_changesets
.append(id)
1405 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1406 del ordered_changeset_map
1408 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1410 # First we scan through all BranchChangesets looking for
1411 # changesets that are individually "retrograde" and splitting
1413 for changeset_id
in branch_changeset_ids
:
1414 self
._split
_if
_retrograde
(changeset_id
)
1418 next_ordered_changeset
= 0
1420 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1422 while self
.changeset_graph
:
1423 # Consume any nodes that don't have predecessors:
1424 for (changeset
, time_range
) \
1425 in self
.changeset_graph
.consume_nopred_nodes():
1426 self
.processed_changeset_logger
.log(changeset
.id)
1427 if changeset
.id in ordered_changeset_ids
:
1428 next_ordered_changeset
+= 1
1429 ordered_changeset_ids
.remove(changeset
.id)
1431 self
.processed_changeset_logger
.flush()
1433 if not self
.changeset_graph
:
1436 # Now work on the next ordered changeset that has not yet been
1437 # processed. BreakSymbolChangesetCyclesPass has broken any
1438 # cycles involving only SymbolChangesets, so the presence of a
1439 # cycle implies that there is at least one ordered changeset
1440 # left in the graph:
1441 assert next_ordered_changeset
< len(ordered_changesets
)
1443 id = ordered_changesets
[next_ordered_changeset
]
1444 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1446 if logger
.is_on(logger
.DEBUG
):
1447 logger
.debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1448 self
.break_segment(path
)
1450 # There were no ordered changesets among the reachable
1451 # predecessors, so do generic cycle-breaking:
1452 if logger
.is_on(logger
.DEBUG
):
1454 'Breaking generic cycle found from %s'
1455 % (self
.changeset_db
[id],)
1457 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1459 del self
.processed_changeset_logger
1460 self
.changeset_graph
.close()
1461 self
.changeset_graph
= None
1462 self
.cvs_item_to_changeset_id
= None
1463 self
.changeset_db
= None
1465 logger
.quiet("Done")
1468 class TopologicalSortPass(Pass
):
1469 """Sort changesets into commit order."""
1471 def register_artifacts(self
):
1472 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1473 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1474 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1475 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1476 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1477 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1478 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1479 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1480 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1482 def get_source_changesets(self
, changeset_db
):
1483 for changeset_id
in changeset_db
.keys():
1484 yield changeset_db
[changeset_id
]
1486 def get_changesets(self
):
1487 """Generate (changeset, timestamp) pairs in commit order."""
1489 changeset_db
= ChangesetDatabase(
1490 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1491 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1494 changeset_graph
= ChangesetGraph(
1496 CVSItemToChangesetTable(
1497 artifact_manager
.get_temp_file(
1498 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1503 symbol_changeset_ids
= set()
1505 for changeset
in self
.get_source_changesets(changeset_db
):
1506 changeset_graph
.add_changeset(changeset
)
1507 if isinstance(changeset
, SymbolChangeset
):
1508 symbol_changeset_ids
.add(changeset
.id)
1510 # Ensure a monotonically-increasing timestamp series by keeping
1511 # track of the previous timestamp and ensuring that the following
1513 timestamper
= Timestamper()
1515 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1516 timestamp
= timestamper
.get(
1517 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1519 yield (changeset
, timestamp
)
1521 changeset_graph
.close()
1523 def run(self
, run_options
, stats_keeper
):
1524 logger
.quiet("Generating CVSRevisions in commit order...")
1526 Ctx()._projects
= read_projects(
1527 artifact_manager
.get_temp_file(config
.PROJECTS
)
1529 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1530 Ctx()._symbol
_db
= SymbolDatabase()
1531 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1532 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1533 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1536 sorted_changesets
= open(
1537 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1540 for (changeset
, timestamp
) in self
.get_changesets():
1541 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1543 sorted_changesets
.close()
1545 Ctx()._cvs
_items
_db
.close()
1546 Ctx()._symbol
_db
.close()
1547 Ctx()._cvs
_path
_db
.close()
1549 logger
.quiet("Done")
1552 class CreateRevsPass(Pass
):
1553 """Generate the SVNCommit <-> CVSRevision mapping databases.
1555 SVNCommitCreator also calls SymbolingsLogger to register
1556 CVSRevisions that represent an opening or closing for a path on a
1557 branch or tag. See SymbolingsLogger for more details.
1559 This pass was formerly known as pass5."""
1561 def register_artifacts(self
):
1562 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1563 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1564 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1565 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1566 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1567 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1568 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1569 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1570 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1571 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1572 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1573 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1575 def get_changesets(self
):
1576 """Generate (changeset,timestamp,) tuples in commit order."""
1578 changeset_db
= ChangesetDatabase(
1579 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1580 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1584 artifact_manager
.get_temp_file(
1585 config
.CHANGESETS_SORTED_DATAFILE
)):
1586 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1587 yield (changeset_db
[changeset_id
], timestamp
)
1589 changeset_db
.close()
1591 def get_svn_commits(self
, creator
):
1592 """Generate the SVNCommits, in order."""
1594 for (changeset
, timestamp
) in self
.get_changesets():
1595 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1598 def log_svn_commit(self
, svn_commit
):
1599 """Output information about SVN_COMMIT."""
1602 'Creating Subversion r%d (%s)'
1603 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1606 if isinstance(svn_commit
, SVNRevisionCommit
):
1607 for cvs_rev
in svn_commit
.cvs_revs
:
1608 logger
.verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1610 def run(self
, run_options
, stats_keeper
):
1611 logger
.quiet("Mapping CVS revisions to Subversion commits...")
1613 Ctx()._projects
= read_projects(
1614 artifact_manager
.get_temp_file(config
.PROJECTS
)
1616 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1617 Ctx()._symbol
_db
= SymbolDatabase()
1618 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1619 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1620 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1623 Ctx()._symbolings
_logger
= SymbolingsLogger()
1625 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1627 creator
= SVNCommitCreator()
1628 for svn_commit
in self
.get_svn_commits(creator
):
1629 self
.log_svn_commit(svn_commit
)
1630 persistence_manager
.put_svn_commit(svn_commit
)
1632 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1635 persistence_manager
.close()
1636 Ctx()._symbolings
_logger
.close()
1637 Ctx()._cvs
_items
_db
.close()
1638 Ctx()._symbol
_db
.close()
1639 Ctx()._cvs
_path
_db
.close()
1641 logger
.quiet("Done")
1644 class SortSymbolOpeningsClosingsPass(Pass
):
1645 """This pass was formerly known as pass6."""
1647 def register_artifacts(self
):
1648 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1649 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1651 def run(self
, run_options
, stats_keeper
):
1652 logger
.quiet("Sorting symbolic name source revisions...")
1655 line
= line
.split(' ', 2)
1656 return (int(line
[0], 16), int(line
[1]), line
[2],)
1659 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1660 artifact_manager
.get_temp_file(
1661 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1664 tempdirs
=[Ctx().tmpdir
],
1666 logger
.quiet("Done")
1669 class IndexSymbolsPass(Pass
):
1670 """This pass was formerly known as pass7."""
1672 def register_artifacts(self
):
1673 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1674 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1675 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1676 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1678 def generate_offsets_for_symbolings(self
):
1679 """This function iterates through all the lines in
1680 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1681 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1682 where SYMBOLIC_NAME is first encountered. This will allow us to
1683 seek to the various offsets in the file and sequentially read only
1684 the openings and closings that we need."""
1689 artifact_manager
.get_temp_file(
1690 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1698 id, svn_revnum
, ignored
= line
.split(" ", 2)
1701 logger
.verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1708 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1709 cPickle
.dump(offsets
, offsets_db
, -1)
1712 def run(self
, run_options
, stats_keeper
):
1713 logger
.quiet("Determining offsets for all symbolic names...")
1714 Ctx()._projects
= read_projects(
1715 artifact_manager
.get_temp_file(config
.PROJECTS
)
1717 Ctx()._symbol
_db
= SymbolDatabase()
1718 self
.generate_offsets_for_symbolings()
1719 Ctx()._symbol
_db
.close()
1720 logger
.quiet("Done.")
1723 class OutputPass(Pass
):
1724 """This pass was formerly known as pass8."""
1726 def register_artifacts(self
):
1727 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1728 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1729 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1730 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1731 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1732 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1733 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1734 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1735 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1736 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1737 Ctx().output_option
.register_artifacts(self
)
1739 def run(self
, run_options
, stats_keeper
):
1740 Ctx()._projects
= read_projects(
1741 artifact_manager
.get_temp_file(config
.PROJECTS
)
1743 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1744 Ctx()._metadata
_db
= MetadataDatabase(
1745 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1746 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1749 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1750 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1751 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1753 Ctx()._symbol
_db
= SymbolDatabase()
1754 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1756 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1759 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1761 svn_commit
.output(Ctx().output_option
)
1763 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1765 Ctx().output_option
.cleanup()
1766 Ctx()._persistence
_manager
.close()
1768 Ctx()._symbol
_db
.close()
1769 Ctx()._cvs
_items
_db
.close()
1770 Ctx()._metadata
_db
.close()
1771 Ctx()._cvs
_path
_db
.close()
1774 # The list of passes constituting a run of cvs2svn:
1777 CleanMetadataPass(),
1778 CollateSymbolsPass(),
1779 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1780 FilterSymbolsPass(),
1781 SortRevisionsPass(),
1783 InitializeChangesetsPass(),
1784 #CheckIndexedItemStoreDependenciesPass(
1785 # config.CVS_ITEMS_SORTED_STORE,
1786 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1787 BreakRevisionChangesetCyclesPass(),
1788 RevisionTopologicalSortPass(),
1789 BreakSymbolChangesetCyclesPass(),
1790 BreakAllChangesetCyclesPass(),
1791 TopologicalSortPass(),
1793 SortSymbolOpeningsClosingsPass(),