1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import Log
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_file_database
import CVSFileDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.collect_data
import CollectData
78 from cvs2svn_lib
.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib
.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass
):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self
):
88 self
._register
_temp
_file
(config
.PROJECTS
)
89 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
90 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
91 self
._register
_temp
_file
(config
.METADATA_STORE
)
92 self
._register
_temp
_file
(config
.CVS_FILES_DB
)
93 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
94 Ctx().revision_recorder
.register_artifacts(self
)
96 def run(self
, run_options
, stats_keeper
):
97 Log().quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(Ctx().revision_recorder
, stats_keeper
)
101 for project
in run_options
.projects
:
102 cd
.process_project(project
)
103 run_options
.projects
= None
105 fatal_errors
= cd
.close()
108 raise FatalException("Pass 1 complete.\n"
111 + "\n".join(fatal_errors
) + "\n"
112 + "Exited due to fatal error(s).")
114 Ctx()._cvs
_file
_db
.close()
115 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
119 class CleanMetadataPass(Pass
):
120 """Clean up CVS revision metadata and write it to a new database."""
122 def register_artifacts(self
):
123 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
124 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
125 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
126 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
128 def _get_clean_author(self
, author
):
129 """Return AUTHOR, converted appropriately to UTF8.
131 Raise a UnicodeException if it cannot be converted using the
132 configured cvs_author_decoder."""
135 return self
._authors
[author
]
140 clean_author
= Ctx().cvs_author_decoder(author
)
142 self
._authors
[author
] = author
143 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
146 clean_author
= clean_author
.encode('utf8')
148 self
._authors
[author
] = author
149 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
151 self
._authors
[author
] = clean_author
154 def _get_clean_log_msg(self
, log_msg
):
155 """Return LOG_MSG, converted appropriately to UTF8.
157 Raise a UnicodeException if it cannot be converted using the
158 configured cvs_log_decoder."""
161 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
164 'Problem decoding log message:\n'
168 % ('-' * 75, log_msg
, '-' * 75,)
172 return clean_log_msg
.encode('utf8')
175 'Problem encoding log message:\n'
179 % ('-' * 75, log_msg
, '-' * 75,)
182 def _clean_metadata(self
, metadata
):
183 """Clean up METADATA by overwriting its members as necessary."""
186 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
187 except UnicodeError, e
:
188 Log().warn('%s: %s' % (warning_prefix
, e
,))
192 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
193 except UnicodeError, e
:
194 Log().warn('%s: %s' % (warning_prefix
, e
,))
197 def run(self
, run_options
, stats_keeper
):
198 Log().quiet("Converting metadata to UTF8...")
199 metadata_db
= MetadataDatabase(
200 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
201 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
204 metadata_clean_db
= MetadataDatabase(
205 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
206 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
210 self
.warnings
= False
212 # A map {author : clean_author} for those known (to avoid
213 # repeating warnings):
216 for id in metadata_db
.iterkeys():
217 metadata
= metadata_db
[id]
219 # Record the original author name because it might be needed for
220 # expanding CVS keywords:
221 metadata
.original_author
= metadata
.author
223 self
._clean
_metadata
(metadata
)
225 metadata_clean_db
[id] = metadata
229 'There were warnings converting author names and/or log messages\n'
230 'to Unicode (see messages above). Please restart this pass\n'
231 'with one or more \'--encoding\' parameters or with\n'
232 '\'--fallback-encoding\'.'
235 metadata_clean_db
.close()
240 class CollateSymbolsPass(Pass
):
241 """Divide symbols into branches, tags, and excludes."""
247 ExcludedSymbol
: 'exclude',
251 def register_artifacts(self
):
252 self
._register
_temp
_file
(config
.SYMBOL_DB
)
253 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
254 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
256 def get_symbol(self
, run_options
, stats
):
257 """Use StrategyRules to decide what to do with a symbol.
259 STATS is an instance of symbol_statistics._Stats describing an
260 instance of Symbol or Trunk. To determine how the symbol is to be
261 converted, consult the StrategyRules in the project's
262 symbol_strategy_rules. Each rule is allowed a chance to change
263 the way the symbol will be converted. If the symbol is not a
264 Trunk or TypedSymbol after all rules have run, raise
265 IndeterminateSymbolException."""
268 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
270 symbol
= rule
.get_symbol(symbol
, stats
)
271 assert symbol
is not None
273 stats
.check_valid(symbol
)
277 def log_symbol_summary(self
, stats
, symbol
):
278 if not self
.symbol_info_file
:
281 if isinstance(symbol
, Trunk
):
283 preferred_parent_name
= '.'
285 name
= stats
.lod
.name
286 if symbol
.preferred_parent_id
is None:
287 preferred_parent_name
= '.'
289 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
290 if isinstance(preferred_parent
, Trunk
):
291 preferred_parent_name
= '.trunk.'
293 preferred_parent_name
= preferred_parent
.name
295 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
296 symbol_path
= symbol
.base_path
300 self
.symbol_info_file
.write(
301 '%-5d %-30s %-10s %s %s\n' % (
302 stats
.lod
.project
.id,
304 self
.conversion_names
[symbol
.__class
__],
306 preferred_parent_name
,
309 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
310 parent_counts
= stats
.possible_parents
.items()
312 self
.symbol_info_file
.write(' # Possible parents:\n')
313 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
314 for (pp
, count
) in parent_counts
:
315 if isinstance(pp
, Trunk
):
316 self
.symbol_info_file
.write(
317 ' # .trunk. : %d\n' % (count
,)
320 self
.symbol_info_file
.write(
321 ' # %s : %d\n' % (pp
.name
, count
,)
324 def get_symbols(self
, run_options
):
325 """Return a map telling how to convert symbols.
327 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
328 indicating how each symbol should be converted. Trunk objects in
329 SYMBOL_STATS are passed through unchanged. One object is included
330 in the return value for each line of development described in
333 Raise FatalError if there was an error."""
338 if Ctx().symbol_info_filename
is not None:
339 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
340 self
.symbol_info_file
.write(
341 '# Columns: project_id symbol_name conversion symbol_path '
342 'preferred_parent_name\n'
345 self
.symbol_info_file
= None
347 # Initialize each symbol strategy rule a single time, even if it
348 # is used in more than one project. First define a map from
349 # object id to symbol strategy rule:
351 for rule_list
in run_options
.project_symbol_strategy_rules
:
352 for rule
in rule_list
:
353 rules
[id(rule
)] = rule
355 for rule
in rules
.itervalues():
356 rule
.start(self
.symbol_stats
)
360 for stats
in self
.symbol_stats
:
362 symbol
= self
.get_symbol(run_options
, stats
)
363 except IndeterminateSymbolException
, e
:
364 self
.log_symbol_summary(stats
, stats
.lod
)
365 mismatches
.append(e
.stats
)
366 except SymbolPlanError
, e
:
367 self
.log_symbol_summary(stats
, stats
.lod
)
370 self
.log_symbol_summary(stats
, symbol
)
371 retval
[stats
.lod
] = symbol
373 for rule
in rules
.itervalues():
376 if self
.symbol_info_file
:
377 self
.symbol_info_file
.close()
379 del self
.symbol_info_file
381 if errors
or mismatches
:
382 s
= ['Problems determining how symbols should be converted:\n']
384 s
.append('%s\n' % (e
,))
387 'It is not clear how the following symbols '
388 'should be converted.\n'
389 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
391 '--symbol-default to resolve the ambiguity.\n'
393 for stats
in mismatches
:
394 s
.append(' %s\n' % (stats
,))
395 raise FatalError(''.join(s
))
399 def run(self
, run_options
, stats_keeper
):
400 Ctx()._projects
= read_projects(
401 artifact_manager
.get_temp_file(config
.PROJECTS
)
403 self
.symbol_stats
= SymbolStatistics(
404 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
407 symbol_map
= self
.get_symbols(run_options
)
409 # Check the symbols for consistency and bail out if there were errors:
410 self
.symbol_stats
.check_consistency(symbol_map
)
412 # Check that the symbols all have SVN paths set and that the paths
414 Ctx().output_option
.check_symbols(symbol_map
)
416 for symbol
in symbol_map
.itervalues():
417 if isinstance(symbol
, ExcludedSymbol
):
418 self
.symbol_stats
.exclude_symbol(symbol
)
420 create_symbol_database(symbol_map
.values())
422 del self
.symbol_stats
427 class FilterSymbolsPass(Pass
):
428 """Delete any branches/tags that are to be excluded.
430 Also delete revisions on excluded branches, and delete other
431 references to the excluded symbols."""
433 def register_artifacts(self
):
434 self
._register
_temp
_file
(config
.SUMMARY_SERIALIZER
)
435 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_DATAFILE
)
436 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
437 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
438 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
439 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
440 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
441 Ctx().revision_excluder
.register_artifacts(self
)
443 def run(self
, run_options
, stats_keeper
):
444 Ctx()._projects
= read_projects(
445 artifact_manager
.get_temp_file(config
.PROJECTS
)
447 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
448 Ctx()._symbol
_db
= SymbolDatabase()
449 cvs_item_store
= OldCVSItemStore(
450 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
452 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
453 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'wb')
454 cPickle
.dump(cvs_item_serializer
, f
, -1)
457 rev_db
= NewSortableCVSRevisionDatabase(
458 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
462 symbol_db
= NewSortableCVSSymbolDatabase(
463 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
467 revision_excluder
= Ctx().revision_excluder
469 Log().quiet("Filtering out excluded symbols and summarizing items...")
471 stats_keeper
.reset_cvs_rev_info()
472 revision_excluder
.start()
474 # Process the cvs items store one file at a time:
475 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
476 Log().verbose(cvs_file_items
.cvs_file
.filename
)
477 cvs_file_items
.filter_excluded_symbols(revision_excluder
)
478 cvs_file_items
.mutate_symbols()
479 cvs_file_items
.adjust_parents()
480 cvs_file_items
.refine_symbols()
481 cvs_file_items
.record_opened_symbols()
482 cvs_file_items
.record_closed_symbols()
483 cvs_file_items
.check_link_consistency()
485 # Store whatever is left to the new file and update statistics:
486 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
487 for cvs_item
in cvs_file_items
.values():
488 stats_keeper
.record_cvs_item(cvs_item
)
490 if isinstance(cvs_item
, CVSRevision
):
492 elif isinstance(cvs_item
, CVSSymbol
):
493 symbol_db
.add(cvs_item
)
495 stats_keeper
.set_stats_reflect_exclude(True)
499 revision_excluder
.finish()
500 cvs_item_store
.close()
501 Ctx()._symbol
_db
.close()
502 Ctx()._cvs
_file
_db
.close()
507 class SortRevisionSummaryPass(Pass
):
508 """Sort the revision summary file."""
510 def register_artifacts(self
):
511 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
512 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_DATAFILE
)
514 def run(self
, run_options
, stats_keeper
):
515 Log().quiet("Sorting CVS revision summaries...")
517 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
518 artifact_manager
.get_temp_file(
519 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
521 tempdirs
=[Ctx().tmpdir
],
526 class SortSymbolSummaryPass(Pass
):
527 """Sort the symbol summary file."""
529 def register_artifacts(self
):
530 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
531 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
533 def run(self
, run_options
, stats_keeper
):
534 Log().quiet("Sorting CVS symbol summaries...")
536 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
537 artifact_manager
.get_temp_file(
538 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
540 tempdirs
=[Ctx().tmpdir
],
545 class InitializeChangesetsPass(Pass
):
546 """Create preliminary CommitSets."""
548 def register_artifacts(self
):
549 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
550 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
551 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
552 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
553 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
554 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
555 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
556 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
557 self
._register
_temp
_file
_needed
(config
.SUMMARY_SERIALIZER
)
558 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
559 self
._register
_temp
_file
_needed
(
560 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
562 def get_revision_changesets(self
):
563 """Generate revision changesets, one at a time.
565 Each time, yield a list of CVSRevisions that might potentially
566 consititute a changeset."""
568 # Create changesets for CVSRevisions:
569 old_metadata_id
= None
573 db
= OldSortableCVSRevisionDatabase(
574 artifact_manager
.get_temp_file(
575 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
577 self
.cvs_item_serializer
,
581 if cvs_rev
.metadata_id
!= old_metadata_id \
582 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
583 # Start a new changeset. First finish up the old changeset,
586 yield changeset_items
588 old_metadata_id
= cvs_rev
.metadata_id
589 changeset_items
.append(cvs_rev
)
590 old_timestamp
= cvs_rev
.timestamp
592 # Finish up the last changeset, if any:
594 yield changeset_items
596 def get_symbol_changesets(self
):
597 """Generate symbol changesets, one at a time.
599 Each time, yield a list of CVSSymbols that might potentially
600 consititute a changeset."""
605 db
= OldSortableCVSSymbolDatabase(
606 artifact_manager
.get_temp_file(
607 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
609 self
.cvs_item_serializer
,
612 for cvs_symbol
in db
:
613 if cvs_symbol
.symbol
.id != old_symbol_id
:
614 # Start a new changeset. First finish up the old changeset,
617 yield changeset_items
619 old_symbol_id
= cvs_symbol
.symbol
.id
620 changeset_items
.append(cvs_symbol
)
622 # Finish up the last changeset, if any:
624 yield changeset_items
627 def compare_items(a
, b
):
629 cmp(a
.timestamp
, b
.timestamp
)
630 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
631 or cmp([int(x
) for x
in a
.rev
.split('.')],
632 [int(x
) for x
in b
.rev
.split('.')])
635 def break_internal_dependencies(self
, changeset_items
):
636 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
638 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
639 belong in a single RevisionChangeset, but there might be internal
640 dependencies among the items. Return a list of lists, where each
641 sublist is a list of CVSRevisions and at least one internal
642 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
643 to be split, then the return value will contain a single value,
644 namely the original value of CHANGESET_ITEMS. Split
645 CHANGESET_ITEMS at most once, even though the resulting changesets
646 might themselves have internal dependencies."""
648 # We only look for succ dependencies, since by doing so we
649 # automatically cover pred dependencies as well. First create a
650 # list of tuples (pred, succ) of id pairs for CVSItems that depend
653 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
654 for cvs_item
in changeset_items
:
655 for next_id
in cvs_item
.get_succ_ids():
656 if next_id
in changeset_cvs_item_ids
:
657 # Sanity check: a CVSItem should never depend on itself:
658 if next_id
== cvs_item
.id:
659 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
661 dependencies
.append((cvs_item
.id, next_id
,))
664 # Sort the changeset_items in a defined order (chronological to the
665 # extent that the timestamps are correct and unique).
666 changeset_items
.sort(self
.compare_items
)
668 for (i
, changeset_item
) in enumerate(changeset_items
):
669 indexes
[changeset_item
.id] = i
670 # How many internal dependencies would be broken by breaking the
671 # Changeset after a particular index?
672 breaks
= [0] * len(changeset_items
)
673 for (pred
, succ
,) in dependencies
:
674 pred_index
= indexes
[pred
]
675 succ_index
= indexes
[succ
]
676 breaks
[min(pred_index
, succ_index
)] += 1
677 breaks
[max(pred_index
, succ_index
)] -= 1
681 for i
in range(1, len(breaks
)):
682 breaks
[i
] += breaks
[i
- 1]
683 for i
in range(0, len(breaks
) - 1):
684 if breaks
[i
] > best_count
:
686 best_count
= breaks
[i
]
687 best_time
= (changeset_items
[i
+ 1].timestamp
688 - changeset_items
[i
].timestamp
)
689 elif breaks
[i
] == best_count \
690 and (changeset_items
[i
+ 1].timestamp
691 - changeset_items
[i
].timestamp
) < best_time
:
693 best_count
= breaks
[i
]
694 best_time
= (changeset_items
[i
+ 1].timestamp
695 - changeset_items
[i
].timestamp
)
696 # Reuse the old changeset.id for the first of the split changesets.
697 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
699 return [changeset_items
]
701 def break_all_internal_dependencies(self
, changeset_items
):
702 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
704 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
705 be part of a single changeset. Break this list into sublists,
706 where the CVSRevisions in each sublist are free of mutual
709 # This method is written non-recursively to avoid any possible
710 # problems with recursion depth.
712 changesets_to_split
= [changeset_items
]
713 while changesets_to_split
:
714 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
715 if len(changesets
) == 1:
716 [changeset_items
] = changesets
717 yield changeset_items
719 # The changeset had to be split; see if either of the
720 # fragments have to be split:
722 changesets_to_split
.extend(changesets
)
724 def get_changesets(self
):
725 """Generate (Changeset, [CVSItem,...]) for all changesets.
727 The Changesets already have their internal dependencies broken.
728 The [CVSItem,...] list is the list of CVSItems in the
729 corresponding Changeset."""
731 for changeset_items
in self
.get_revision_changesets():
732 for split_changeset_items \
733 in self
.break_all_internal_dependencies(changeset_items
):
736 self
.changeset_key_generator
.gen_id(),
737 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
739 split_changeset_items
,
742 for changeset_items
in self
.get_symbol_changesets():
744 create_symbol_changeset(
745 self
.changeset_key_generator
.gen_id(),
746 changeset_items
[0].symbol
,
747 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
752 def run(self
, run_options
, stats_keeper
):
753 Log().quiet("Creating preliminary commit sets...")
755 Ctx()._projects
= read_projects(
756 artifact_manager
.get_temp_file(config
.PROJECTS
)
758 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
759 Ctx()._symbol
_db
= SymbolDatabase()
761 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'rb')
762 self
.cvs_item_serializer
= cPickle
.load(f
)
765 changeset_db
= ChangesetDatabase(
766 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
767 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
770 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
771 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
775 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
776 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
777 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
780 self
.changeset_key_generator
= KeyGenerator()
782 for (changeset
, changeset_items
) in self
.get_changesets():
783 if Log().is_on(Log
.DEBUG
):
784 Log().debug(repr(changeset
))
785 changeset_db
.store(changeset
)
786 for cvs_item
in changeset_items
:
787 self
.sorted_cvs_items_db
.add(cvs_item
)
788 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
790 self
.sorted_cvs_items_db
.close()
791 cvs_item_to_changeset_id
.close()
793 Ctx()._symbol
_db
.close()
794 Ctx()._cvs
_file
_db
.close()
796 del self
.cvs_item_serializer
801 class ProcessedChangesetLogger
:
803 self
.processed_changeset_ids
= []
805 def log(self
, changeset_id
):
806 if Log().is_on(Log
.DEBUG
):
807 self
.processed_changeset_ids
.append(changeset_id
)
810 if self
.processed_changeset_ids
:
812 'Consumed changeset ids %s'
813 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
815 del self
.processed_changeset_ids
[:]
818 class BreakRevisionChangesetCyclesPass(Pass
):
819 """Break up any dependency cycles involving only RevisionChangesets."""
821 def register_artifacts(self
):
822 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
823 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
824 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
825 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
826 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
827 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
828 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
829 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
830 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
831 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
832 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
834 def get_source_changesets(self
):
835 old_changeset_db
= ChangesetDatabase(
836 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
837 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
840 changeset_ids
= old_changeset_db
.keys()
842 for changeset_id
in changeset_ids
:
843 yield old_changeset_db
[changeset_id
]
845 old_changeset_db
.close()
848 def break_cycle(self
, cycle
):
849 """Break up one or more changesets in CYCLE to help break the cycle.
851 CYCLE is a list of Changesets where
853 cycle[i] depends on cycle[i - 1]
855 Break up one or more changesets in CYCLE to make progress towards
856 breaking the cycle. Update self.changeset_graph accordingly.
858 It is not guaranteed that the cycle will be broken by one call to
859 this routine, but at least some progress must be made."""
861 self
.processed_changeset_logger
.flush()
864 for i
in range(len(cycle
)):
865 # It's OK if this index wraps to -1:
866 link
= ChangesetGraphLink(
867 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
869 if best_i
is None or link
< best_link
:
873 if Log().is_on(Log
.DEBUG
):
875 'Breaking cycle %s by breaking node %x' % (
876 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
877 best_link
.changeset
.id,))
879 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
881 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
883 for changeset
in new_changesets
:
884 self
.changeset_graph
.add_new_changeset(changeset
)
886 def run(self
, run_options
, stats_keeper
):
887 Log().quiet("Breaking revision changeset dependency cycles...")
889 Ctx()._projects
= read_projects(
890 artifact_manager
.get_temp_file(config
.PROJECTS
)
892 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
893 Ctx()._symbol
_db
= SymbolDatabase()
894 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
895 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
896 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
900 artifact_manager
.get_temp_file(
901 config
.CVS_ITEM_TO_CHANGESET
),
902 artifact_manager
.get_temp_file(
903 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
904 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
905 artifact_manager
.get_temp_file(
906 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
909 changeset_db
= ChangesetDatabase(
910 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
911 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
914 self
.changeset_graph
= ChangesetGraph(
915 changeset_db
, cvs_item_to_changeset_id
919 for changeset
in self
.get_source_changesets():
920 changeset_db
.store(changeset
)
921 if isinstance(changeset
, RevisionChangeset
):
922 self
.changeset_graph
.add_changeset(changeset
)
923 max_changeset_id
= max(max_changeset_id
, changeset
.id)
925 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
927 self
.processed_changeset_logger
= ProcessedChangesetLogger()
929 # Consume the graph, breaking cycles using self.break_cycle():
930 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
931 cycle_breaker
=self
.break_cycle
933 self
.processed_changeset_logger
.log(changeset
.id)
935 self
.processed_changeset_logger
.flush()
936 del self
.processed_changeset_logger
938 self
.changeset_graph
.close()
939 self
.changeset_graph
= None
940 Ctx()._cvs
_items
_db
.close()
941 Ctx()._symbol
_db
.close()
942 Ctx()._cvs
_file
_db
.close()
947 class RevisionTopologicalSortPass(Pass
):
948 """Sort RevisionChangesets into commit order.
950 Also convert them to OrderedChangesets, without changing their ids."""
952 def register_artifacts(self
):
953 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
954 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
955 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
956 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
957 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
958 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
959 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
960 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
961 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
962 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
964 def get_source_changesets(self
, changeset_db
):
965 changeset_ids
= changeset_db
.keys()
967 for changeset_id
in changeset_ids
:
968 yield changeset_db
[changeset_id
]
970 def get_changesets(self
):
971 changeset_db
= ChangesetDatabase(
972 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
973 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
977 changeset_graph
= ChangesetGraph(
979 CVSItemToChangesetTable(
980 artifact_manager
.get_temp_file(
981 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
987 for changeset
in self
.get_source_changesets(changeset_db
):
988 if isinstance(changeset
, RevisionChangeset
):
989 changeset_graph
.add_changeset(changeset
)
996 changeset_ids
.append(None)
998 for (changeset
, time_range
) in changeset_graph
.consume_graph():
999 changeset_ids
.append(changeset
.id)
1002 changeset_ids
.append(None)
1004 for i
in range(1, len(changeset_ids
) - 1):
1005 changeset
= changeset_db
[changeset_ids
[i
]]
1006 yield OrderedChangeset(
1007 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1008 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1010 changeset_graph
.close()
1012 def run(self
, run_options
, stats_keeper
):
1013 Log().quiet("Generating CVSRevisions in commit order...")
1015 Ctx()._projects
= read_projects(
1016 artifact_manager
.get_temp_file(config
.PROJECTS
)
1018 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1019 Ctx()._symbol
_db
= SymbolDatabase()
1020 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1021 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1022 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1025 changesets_revordered_db
= ChangesetDatabase(
1026 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1027 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1030 for changeset
in self
.get_changesets():
1031 changesets_revordered_db
.store(changeset
)
1033 changesets_revordered_db
.close()
1034 Ctx()._cvs
_items
_db
.close()
1035 Ctx()._symbol
_db
.close()
1036 Ctx()._cvs
_file
_db
.close()
1041 class BreakSymbolChangesetCyclesPass(Pass
):
1042 """Break up any dependency cycles involving only SymbolChangesets."""
1044 def register_artifacts(self
):
1045 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1046 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1047 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1048 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1049 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1050 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1051 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1052 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1053 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1054 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1055 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1057 def get_source_changesets(self
):
1058 old_changeset_db
= ChangesetDatabase(
1059 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1060 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1063 changeset_ids
= old_changeset_db
.keys()
1065 for changeset_id
in changeset_ids
:
1066 yield old_changeset_db
[changeset_id
]
1068 old_changeset_db
.close()
1070 def break_cycle(self
, cycle
):
1071 """Break up one or more changesets in CYCLE to help break the cycle.
1073 CYCLE is a list of Changesets where
1075 cycle[i] depends on cycle[i - 1]
1077 Break up one or more changesets in CYCLE to make progress towards
1078 breaking the cycle. Update self.changeset_graph accordingly.
1080 It is not guaranteed that the cycle will be broken by one call to
1081 this routine, but at least some progress must be made."""
1083 self
.processed_changeset_logger
.flush()
1086 for i
in range(len(cycle
)):
1087 # It's OK if this index wraps to -1:
1088 link
= ChangesetGraphLink(
1089 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1091 if best_i
is None or link
< best_link
:
1095 if Log().is_on(Log
.DEBUG
):
1097 'Breaking cycle %s by breaking node %x' % (
1098 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1099 best_link
.changeset
.id,))
1101 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1103 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1105 for changeset
in new_changesets
:
1106 self
.changeset_graph
.add_new_changeset(changeset
)
1108 def run(self
, run_options
, stats_keeper
):
1109 Log().quiet("Breaking symbol changeset dependency cycles...")
1111 Ctx()._projects
= read_projects(
1112 artifact_manager
.get_temp_file(config
.PROJECTS
)
1114 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1115 Ctx()._symbol
_db
= SymbolDatabase()
1116 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1117 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1118 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1122 artifact_manager
.get_temp_file(
1123 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1124 artifact_manager
.get_temp_file(
1125 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1126 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1127 artifact_manager
.get_temp_file(
1128 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1131 changeset_db
= ChangesetDatabase(
1132 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1133 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1136 self
.changeset_graph
= ChangesetGraph(
1137 changeset_db
, cvs_item_to_changeset_id
1140 max_changeset_id
= 0
1141 for changeset
in self
.get_source_changesets():
1142 changeset_db
.store(changeset
)
1143 if isinstance(changeset
, SymbolChangeset
):
1144 self
.changeset_graph
.add_changeset(changeset
)
1145 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1147 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1149 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1151 # Consume the graph, breaking cycles using self.break_cycle():
1152 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1153 cycle_breaker
=self
.break_cycle
1155 self
.processed_changeset_logger
.log(changeset
.id)
1157 self
.processed_changeset_logger
.flush()
1158 del self
.processed_changeset_logger
1160 self
.changeset_graph
.close()
1161 self
.changeset_graph
= None
1162 Ctx()._cvs
_items
_db
.close()
1163 Ctx()._symbol
_db
.close()
1164 Ctx()._cvs
_file
_db
.close()
1169 class BreakAllChangesetCyclesPass(Pass
):
1170 """Break up any dependency cycles that are closed by SymbolChangesets."""
1172 def register_artifacts(self
):
1173 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1174 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1175 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1176 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1177 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1178 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1179 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1180 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1181 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1182 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1183 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1185 def get_source_changesets(self
):
1186 old_changeset_db
= ChangesetDatabase(
1187 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1188 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1191 changeset_ids
= old_changeset_db
.keys()
1193 for changeset_id
in changeset_ids
:
1194 yield old_changeset_db
[changeset_id
]
1196 old_changeset_db
.close()
1198 def _split_retrograde_changeset(self
, changeset
):
1199 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1201 Log().debug('Breaking retrograde changeset %x' % (changeset
.id,))
1203 self
.changeset_graph
.delete_changeset(changeset
)
1205 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1207 for cvs_branch
in changeset
.iter_cvs_items():
1208 max_pred_ordinal
= 0
1209 min_succ_ordinal
= sys
.maxint
1211 for pred_id
in cvs_branch
.get_pred_ids():
1212 pred_ordinal
= self
.ordinals
.get(
1213 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1214 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1216 for succ_id
in cvs_branch
.get_succ_ids():
1217 succ_ordinal
= self
.ordinals
.get(
1218 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1219 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1221 assert max_pred_ordinal
< min_succ_ordinal
1222 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1224 # Find the earliest successor ordinal:
1225 min_min_succ_ordinal
= sys
.maxint
1226 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1227 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1231 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1232 if max_pred_ordinal
>= min_min_succ_ordinal
:
1233 late_item_ids
.append(id)
1235 early_item_ids
.append(id)
1237 assert early_item_ids
1238 assert late_item_ids
1240 early_changeset
= changeset
.create_split_changeset(
1241 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1242 late_changeset
= changeset
.create_split_changeset(
1243 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1245 self
.changeset_graph
.add_new_changeset(early_changeset
)
1246 self
.changeset_graph
.add_new_changeset(late_changeset
)
1248 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1250 # Because of the way we constructed it, the early changeset should
1251 # not have to be split:
1252 assert not early_split
1254 self
._split
_if
_retrograde
(late_changeset
.id)
1256 def _split_if_retrograde(self
, changeset_id
):
1257 node
= self
.changeset_graph
[changeset_id
]
1260 for id in node
.pred_ids
1261 if id in self
.ordinals
1263 pred_ordinals
.sort()
1266 for id in node
.succ_ids
1267 if id in self
.ordinals
1269 succ_ordinals
.sort()
1270 if pred_ordinals
and succ_ordinals \
1271 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1272 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1277 def break_segment(self
, segment
):
1278 """Break a changeset in SEGMENT[1:-1].
1280 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1281 that range are SymbolChangesets."""
1285 for i
in range(1, len(segment
) - 1):
1286 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1288 if best_i
is None or link
< best_link
:
1292 if Log().is_on(Log
.DEBUG
):
1294 'Breaking segment %s by breaking node %x' % (
1295 ' -> '.join(['%x' % node
.id for node
in segment
]),
1296 best_link
.changeset
.id,))
1298 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1300 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1302 for changeset
in new_changesets
:
1303 self
.changeset_graph
.add_new_changeset(changeset
)
1305 def break_cycle(self
, cycle
):
1306 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1308 CYCLE is a list of SymbolChangesets where
1310 cycle[i] depends on cycle[i - 1]
1312 . Break up one or more changesets in CYCLE to make progress
1313 towards breaking the cycle. Update self.changeset_graph
1316 It is not guaranteed that the cycle will be broken by one call to
1317 this routine, but at least some progress must be made."""
1319 if Log().is_on(Log
.DEBUG
):
1321 'Breaking cycle %s' % (
1322 ' -> '.join(['%x' % changeset
.id
1323 for changeset
in cycle
+ [cycle
[0]]]),))
1325 # Unwrap the cycle into a segment then break the segment:
1326 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1328 def run(self
, run_options
, stats_keeper
):
1329 Log().quiet("Breaking CVSSymbol dependency loops...")
1331 Ctx()._projects
= read_projects(
1332 artifact_manager
.get_temp_file(config
.PROJECTS
)
1334 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1335 Ctx()._symbol
_db
= SymbolDatabase()
1336 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1337 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1338 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1342 artifact_manager
.get_temp_file(
1343 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1344 artifact_manager
.get_temp_file(
1345 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1346 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1347 artifact_manager
.get_temp_file(
1348 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1351 self
.changeset_db
= ChangesetDatabase(
1352 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1353 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1356 self
.changeset_graph
= ChangesetGraph(
1357 self
.changeset_db
, self
.cvs_item_to_changeset_id
1360 # A map {changeset_id : ordinal} for OrderedChangesets:
1362 # A map {ordinal : changeset_id}:
1363 ordered_changeset_map
= {}
1364 # A list of all BranchChangeset ids:
1365 branch_changeset_ids
= []
1366 max_changeset_id
= 0
1367 for changeset
in self
.get_source_changesets():
1368 self
.changeset_db
.store(changeset
)
1369 self
.changeset_graph
.add_changeset(changeset
)
1370 if isinstance(changeset
, OrderedChangeset
):
1371 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1372 self
.ordinals
[changeset
.id] = changeset
.ordinal
1373 elif isinstance(changeset
, BranchChangeset
):
1374 branch_changeset_ids
.append(changeset
.id)
1375 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1377 # An array of ordered_changeset ids, indexed by ordinal:
1378 ordered_changesets
= []
1379 for ordinal
in range(len(ordered_changeset_map
)):
1380 id = ordered_changeset_map
[ordinal
]
1381 ordered_changesets
.append(id)
1383 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1384 del ordered_changeset_map
1386 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1388 # First we scan through all BranchChangesets looking for
1389 # changesets that are individually "retrograde" and splitting
1391 for changeset_id
in branch_changeset_ids
:
1392 self
._split
_if
_retrograde
(changeset_id
)
1396 next_ordered_changeset
= 0
1398 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1400 while self
.changeset_graph
:
1401 # Consume any nodes that don't have predecessors:
1402 for (changeset
, time_range
) \
1403 in self
.changeset_graph
.consume_nopred_nodes():
1404 self
.processed_changeset_logger
.log(changeset
.id)
1405 if changeset
.id in ordered_changeset_ids
:
1406 next_ordered_changeset
+= 1
1407 ordered_changeset_ids
.remove(changeset
.id)
1409 self
.processed_changeset_logger
.flush()
1411 if not self
.changeset_graph
:
1414 # Now work on the next ordered changeset that has not yet been
1415 # processed. BreakSymbolChangesetCyclesPass has broken any
1416 # cycles involving only SymbolChangesets, so the presence of a
1417 # cycle implies that there is at least one ordered changeset
1418 # left in the graph:
1419 assert next_ordered_changeset
< len(ordered_changesets
)
1421 id = ordered_changesets
[next_ordered_changeset
]
1422 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1424 if Log().is_on(Log
.DEBUG
):
1425 Log().debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1426 self
.break_segment(path
)
1428 # There were no ordered changesets among the reachable
1429 # predecessors, so do generic cycle-breaking:
1430 if Log().is_on(Log
.DEBUG
):
1432 'Breaking generic cycle found from %s'
1433 % (self
.changeset_db
[id],)
1435 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1437 del self
.processed_changeset_logger
1438 self
.changeset_graph
.close()
1439 self
.changeset_graph
= None
1440 self
.cvs_item_to_changeset_id
= None
1441 self
.changeset_db
= None
1446 class TopologicalSortPass(Pass
):
1447 """Sort changesets into commit order."""
1449 def register_artifacts(self
):
1450 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1451 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1452 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1453 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1454 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1455 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1456 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1457 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1458 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1460 def get_source_changesets(self
, changeset_db
):
1461 for changeset_id
in changeset_db
.keys():
1462 yield changeset_db
[changeset_id
]
1464 def get_changesets(self
):
1465 """Generate (changeset, timestamp) pairs in commit order."""
1467 changeset_db
= ChangesetDatabase(
1468 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1469 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1472 changeset_graph
= ChangesetGraph(
1474 CVSItemToChangesetTable(
1475 artifact_manager
.get_temp_file(
1476 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1481 symbol_changeset_ids
= set()
1483 for changeset
in self
.get_source_changesets(changeset_db
):
1484 changeset_graph
.add_changeset(changeset
)
1485 if isinstance(changeset
, SymbolChangeset
):
1486 symbol_changeset_ids
.add(changeset
.id)
1488 # Ensure a monotonically-increasing timestamp series by keeping
1489 # track of the previous timestamp and ensuring that the following
1491 timestamper
= Timestamper()
1493 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1494 timestamp
= timestamper
.get(
1495 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1497 yield (changeset
, timestamp
)
1499 changeset_graph
.close()
1501 def run(self
, run_options
, stats_keeper
):
1502 Log().quiet("Generating CVSRevisions in commit order...")
1504 Ctx()._projects
= read_projects(
1505 artifact_manager
.get_temp_file(config
.PROJECTS
)
1507 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1508 Ctx()._symbol
_db
= SymbolDatabase()
1509 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1510 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1511 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1514 sorted_changesets
= open(
1515 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1518 for (changeset
, timestamp
) in self
.get_changesets():
1519 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1521 sorted_changesets
.close()
1523 Ctx()._cvs
_items
_db
.close()
1524 Ctx()._symbol
_db
.close()
1525 Ctx()._cvs
_file
_db
.close()
1530 class CreateRevsPass(Pass
):
1531 """Generate the SVNCommit <-> CVSRevision mapping databases.
1533 SVNCommitCreator also calls SymbolingsLogger to register
1534 CVSRevisions that represent an opening or closing for a path on a
1535 branch or tag. See SymbolingsLogger for more details.
1537 This pass was formerly known as pass5."""
1539 def register_artifacts(self
):
1540 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1541 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1542 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1543 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1544 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1545 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1546 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1547 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1548 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1549 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1550 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1551 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1553 def get_changesets(self
):
1554 """Generate (changeset,timestamp,) tuples in commit order."""
1556 changeset_db
= ChangesetDatabase(
1557 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1558 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1562 artifact_manager
.get_temp_file(
1563 config
.CHANGESETS_SORTED_DATAFILE
)):
1564 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1565 yield (changeset_db
[changeset_id
], timestamp
)
1567 changeset_db
.close()
1569 def get_svn_commits(self
, creator
):
1570 """Generate the SVNCommits, in order."""
1572 for (changeset
, timestamp
) in self
.get_changesets():
1573 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1576 def log_svn_commit(self
, svn_commit
):
1577 """Output information about SVN_COMMIT."""
1580 'Creating Subversion r%d (%s)'
1581 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1584 if isinstance(svn_commit
, SVNRevisionCommit
):
1585 for cvs_rev
in svn_commit
.cvs_revs
:
1586 Log().verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1588 def run(self
, run_options
, stats_keeper
):
1589 Log().quiet("Mapping CVS revisions to Subversion commits...")
1591 Ctx()._projects
= read_projects(
1592 artifact_manager
.get_temp_file(config
.PROJECTS
)
1594 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1595 Ctx()._symbol
_db
= SymbolDatabase()
1596 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1597 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1598 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1601 Ctx()._symbolings
_logger
= SymbolingsLogger()
1603 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1605 creator
= SVNCommitCreator()
1606 for svn_commit
in self
.get_svn_commits(creator
):
1607 self
.log_svn_commit(svn_commit
)
1608 persistence_manager
.put_svn_commit(svn_commit
)
1610 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1613 persistence_manager
.close()
1614 Ctx()._symbolings
_logger
.close()
1615 Ctx()._cvs
_items
_db
.close()
1616 Ctx()._symbol
_db
.close()
1617 Ctx()._cvs
_file
_db
.close()
1622 class SortSymbolsPass(Pass
):
1623 """This pass was formerly known as pass6."""
1625 def register_artifacts(self
):
1626 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1627 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1629 def run(self
, run_options
, stats_keeper
):
1630 Log().quiet("Sorting symbolic name source revisions...")
1633 line
= line
.split(' ', 2)
1634 return (int(line
[0], 16), int(line
[1]), line
[2],)
1637 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1638 artifact_manager
.get_temp_file(
1639 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1642 tempdirs
=[Ctx().tmpdir
],
1647 class IndexSymbolsPass(Pass
):
1648 """This pass was formerly known as pass7."""
1650 def register_artifacts(self
):
1651 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1652 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1653 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1654 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1656 def generate_offsets_for_symbolings(self
):
1657 """This function iterates through all the lines in
1658 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1659 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1660 where SYMBOLIC_NAME is first encountered. This will allow us to
1661 seek to the various offsets in the file and sequentially read only
1662 the openings and closings that we need."""
1667 artifact_manager
.get_temp_file(
1668 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1676 id, svn_revnum
, ignored
= line
.split(" ", 2)
1679 Log().verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1686 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1687 cPickle
.dump(offsets
, offsets_db
, -1)
1690 def run(self
, run_options
, stats_keeper
):
1691 Log().quiet("Determining offsets for all symbolic names...")
1692 Ctx()._projects
= read_projects(
1693 artifact_manager
.get_temp_file(config
.PROJECTS
)
1695 Ctx()._symbol
_db
= SymbolDatabase()
1696 self
.generate_offsets_for_symbolings()
1697 Ctx()._symbol
_db
.close()
1698 Log().quiet("Done.")
1701 class OutputPass(Pass
):
1702 """This pass was formerly known as pass8."""
1704 def register_artifacts(self
):
1705 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1706 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1707 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1708 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1709 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1710 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1711 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1712 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1713 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1714 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1715 Ctx().output_option
.register_artifacts(self
)
1717 def run(self
, run_options
, stats_keeper
):
1718 Ctx()._projects
= read_projects(
1719 artifact_manager
.get_temp_file(config
.PROJECTS
)
1721 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1722 Ctx()._metadata
_db
= MetadataDatabase(
1723 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1724 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1727 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1728 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1729 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1731 Ctx()._symbol
_db
= SymbolDatabase()
1732 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1734 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1737 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1739 svn_commit
.output(Ctx().output_option
)
1741 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1743 Ctx().output_option
.cleanup()
1744 Ctx()._persistence
_manager
.close()
1746 Ctx()._symbol
_db
.close()
1747 Ctx()._cvs
_items
_db
.close()
1748 Ctx()._metadata
_db
.close()
1749 Ctx()._cvs
_file
_db
.close()
1752 # The list of passes constituting a run of cvs2svn:
1755 CleanMetadataPass(),
1756 CollateSymbolsPass(),
1757 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1758 FilterSymbolsPass(),
1759 SortRevisionSummaryPass(),
1760 SortSymbolSummaryPass(),
1761 InitializeChangesetsPass(),
1762 #CheckIndexedItemStoreDependenciesPass(
1763 # config.CVS_ITEMS_SORTED_STORE,
1764 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1765 BreakRevisionChangesetCyclesPass(),
1766 RevisionTopologicalSortPass(),
1767 BreakSymbolChangesetCyclesPass(),
1768 BreakAllChangesetCyclesPass(),
1769 TopologicalSortPass(),