1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import Log
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_file_database
import CVSFileDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.collect_data
import CollectData
78 from cvs2svn_lib
.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib
.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass
):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self
):
88 self
._register
_temp
_file
(config
.PROJECTS
)
89 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
90 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
91 self
._register
_temp
_file
(config
.METADATA_STORE
)
92 self
._register
_temp
_file
(config
.CVS_FILES_DB
)
93 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
94 Ctx().revision_recorder
.register_artifacts(self
)
96 def run(self
, run_options
, stats_keeper
):
97 Log().quiet("Examining all CVS ',v' files...")
99 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_NEW
)
100 cd
= CollectData(Ctx().revision_recorder
, stats_keeper
)
101 for project
in run_options
.projects
:
102 cd
.process_project(project
)
103 run_options
.projects
= None
105 fatal_errors
= cd
.close()
108 raise FatalException("Pass 1 complete.\n"
111 + "\n".join(fatal_errors
) + "\n"
112 + "Exited due to fatal error(s).")
114 Ctx()._cvs
_file
_db
.close()
115 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
119 class CleanMetadataPass(Pass
):
120 """Clean up CVS revision metadata and write it to a new database."""
122 def register_artifacts(self
):
123 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
124 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
125 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
126 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
128 def _get_clean_author(self
, author
):
129 """Return AUTHOR, converted appropriately to UTF8.
131 Raise a UnicodeException if it cannot be converted using the
132 configured cvs_author_decoder."""
135 return self
._authors
[author
]
140 clean_author
= Ctx().cvs_author_decoder(author
)
142 self
._authors
[author
] = author
143 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
146 clean_author
= clean_author
.encode('utf8')
148 self
._authors
[author
] = author
149 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
151 self
._authors
[author
] = clean_author
154 def _get_clean_log_msg(self
, log_msg
):
155 """Return LOG_MSG, converted appropriately to UTF8.
157 Raise a UnicodeException if it cannot be converted using the
158 configured cvs_log_decoder."""
161 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
164 'Problem decoding log message:\n'
168 % ('-' * 75, log_msg
, '-' * 75,)
172 return clean_log_msg
.encode('utf8')
175 'Problem encoding log message:\n'
179 % ('-' * 75, log_msg
, '-' * 75,)
182 def _clean_metadata(self
, metadata
):
183 """Clean up METADATA by overwriting its members as necessary."""
186 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
187 except UnicodeError, e
:
188 Log().warn('%s: %s' % (warning_prefix
, e
,))
192 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
193 except UnicodeError, e
:
194 Log().warn('%s: %s' % (warning_prefix
, e
,))
197 def run(self
, run_options
, stats_keeper
):
198 Log().quiet("Converting metadata to UTF8...")
199 metadata_db
= MetadataDatabase(
200 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
201 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
204 metadata_clean_db
= MetadataDatabase(
205 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
206 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
210 self
.warnings
= False
212 # A map {author : clean_author} for those known (to avoid
213 # repeating warnings):
216 for id in metadata_db
.iterkeys():
217 metadata
= metadata_db
[id]
219 # Record the original author name because it might be needed for
220 # expanding CVS keywords:
221 metadata
.original_author
= metadata
.author
223 self
._clean
_metadata
(metadata
)
225 metadata_clean_db
[id] = metadata
229 'There were warnings converting author names and/or log messages\n'
230 'to Unicode (see messages above). Please restart this pass\n'
231 'with one or more \'--encoding\' parameters or with\n'
232 '\'--fallback-encoding\'.'
235 metadata_clean_db
.close()
240 class CollateSymbolsPass(Pass
):
241 """Divide symbols into branches, tags, and excludes."""
247 ExcludedSymbol
: 'exclude',
251 def register_artifacts(self
):
252 self
._register
_temp
_file
(config
.SYMBOL_DB
)
253 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
254 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
256 def get_symbol(self
, run_options
, stats
):
257 """Use StrategyRules to decide what to do with a symbol.
259 STATS is an instance of symbol_statistics._Stats describing an
260 instance of Symbol or Trunk. To determine how the symbol is to be
261 converted, consult the StrategyRules in the project's
262 symbol_strategy_rules. Each rule is allowed a chance to change
263 the way the symbol will be converted. If the symbol is not a
264 Trunk or TypedSymbol after all rules have run, raise
265 IndeterminateSymbolException."""
268 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
270 symbol
= rule
.get_symbol(symbol
, stats
)
271 assert symbol
is not None
273 stats
.check_valid(symbol
)
277 def log_symbol_summary(self
, stats
, symbol
):
278 if not self
.symbol_info_file
:
281 if isinstance(symbol
, Trunk
):
283 preferred_parent_name
= '.'
285 name
= stats
.lod
.name
286 if symbol
.preferred_parent_id
is None:
287 preferred_parent_name
= '.'
289 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
290 if isinstance(preferred_parent
, Trunk
):
291 preferred_parent_name
= '.trunk.'
293 preferred_parent_name
= preferred_parent
.name
295 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
296 symbol_path
= symbol
.base_path
300 self
.symbol_info_file
.write(
301 '%-5d %-30s %-10s %s %s\n' % (
302 stats
.lod
.project
.id,
304 self
.conversion_names
[symbol
.__class
__],
306 preferred_parent_name
,
309 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
310 parent_counts
= stats
.possible_parents
.items()
312 self
.symbol_info_file
.write(' # Possible parents:\n')
313 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
314 for (pp
, count
) in parent_counts
:
315 if isinstance(pp
, Trunk
):
316 self
.symbol_info_file
.write(
317 ' # .trunk. : %d\n' % (count
,)
320 self
.symbol_info_file
.write(
321 ' # %s : %d\n' % (pp
.name
, count
,)
324 def get_symbols(self
, run_options
):
325 """Return a map telling how to convert symbols.
327 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
328 indicating how each symbol should be converted. Trunk objects in
329 SYMBOL_STATS are passed through unchanged. One object is included
330 in the return value for each line of development described in
333 Raise FatalError if there was an error."""
338 if Ctx().symbol_info_filename
is not None:
339 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
340 self
.symbol_info_file
.write(
341 '# Columns: project_id symbol_name conversion symbol_path '
342 'preferred_parent_name\n'
345 self
.symbol_info_file
= None
347 # Initialize each symbol strategy rule a single time, even if it
348 # is used in more than one project. First define a map from
349 # object id to symbol strategy rule:
351 for rule_list
in run_options
.project_symbol_strategy_rules
:
352 for rule
in rule_list
:
353 rules
[id(rule
)] = rule
355 for rule
in rules
.itervalues():
356 rule
.start(self
.symbol_stats
)
360 for stats
in self
.symbol_stats
:
362 symbol
= self
.get_symbol(run_options
, stats
)
363 except IndeterminateSymbolException
, e
:
364 self
.log_symbol_summary(stats
, stats
.lod
)
365 mismatches
.append(e
.stats
)
366 except SymbolPlanError
, e
:
367 self
.log_symbol_summary(stats
, stats
.lod
)
370 self
.log_symbol_summary(stats
, symbol
)
371 retval
[stats
.lod
] = symbol
373 for rule
in rules
.itervalues():
376 if self
.symbol_info_file
:
377 self
.symbol_info_file
.close()
379 del self
.symbol_info_file
381 if errors
or mismatches
:
382 s
= ['Problems determining how symbols should be converted:\n']
384 s
.append('%s\n' % (e
,))
387 'It is not clear how the following symbols '
388 'should be converted.\n'
389 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
391 '--symbol-default to resolve the ambiguity.\n'
393 for stats
in mismatches
:
394 s
.append(' %s\n' % (stats
,))
395 raise FatalError(''.join(s
))
399 def run(self
, run_options
, stats_keeper
):
400 Ctx()._projects
= read_projects(
401 artifact_manager
.get_temp_file(config
.PROJECTS
)
403 self
.symbol_stats
= SymbolStatistics(
404 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
407 symbol_map
= self
.get_symbols(run_options
)
409 # Check the symbols for consistency and bail out if there were errors:
410 self
.symbol_stats
.check_consistency(symbol_map
)
412 # Check that the symbols all have SVN paths set and that the paths
414 Ctx().output_option
.check_symbols(symbol_map
)
416 for symbol
in symbol_map
.itervalues():
417 if isinstance(symbol
, ExcludedSymbol
):
418 self
.symbol_stats
.exclude_symbol(symbol
)
420 create_symbol_database(symbol_map
.values())
422 del self
.symbol_stats
427 class FilterSymbolsPass(Pass
):
428 """Delete any branches/tags that are to be excluded.
430 Also delete revisions on excluded branches, and delete other
431 references to the excluded symbols."""
433 def register_artifacts(self
):
434 self
._register
_temp
_file
(config
.SUMMARY_SERIALIZER
)
435 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_DATAFILE
)
436 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
437 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
438 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
439 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
440 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
441 Ctx().revision_excluder
.register_artifacts(self
)
443 def run(self
, run_options
, stats_keeper
):
444 Ctx()._projects
= read_projects(
445 artifact_manager
.get_temp_file(config
.PROJECTS
)
447 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
448 Ctx()._symbol
_db
= SymbolDatabase()
449 cvs_item_store
= OldCVSItemStore(
450 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
452 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
453 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'wb')
454 cPickle
.dump(cvs_item_serializer
, f
, -1)
457 rev_db
= NewSortableCVSRevisionDatabase(
458 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
462 symbol_db
= NewSortableCVSSymbolDatabase(
463 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
467 revision_excluder
= Ctx().revision_excluder
469 Log().quiet("Filtering out excluded symbols and summarizing items...")
471 stats_keeper
.reset_cvs_rev_info()
472 revision_excluder
.start()
474 # Process the cvs items store one file at a time:
475 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
476 Log().verbose(cvs_file_items
.cvs_file
.filename
)
477 cvs_file_items
.filter_excluded_symbols(revision_excluder
)
478 cvs_file_items
.mutate_symbols()
479 cvs_file_items
.adjust_parents()
480 cvs_file_items
.refine_symbols()
481 cvs_file_items
.record_opened_symbols()
482 cvs_file_items
.record_closed_symbols()
483 cvs_file_items
.check_link_consistency()
485 # Store whatever is left to the new file and update statistics:
486 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
487 for cvs_item
in cvs_file_items
.values():
488 stats_keeper
.record_cvs_item(cvs_item
)
490 if isinstance(cvs_item
, CVSRevision
):
492 elif isinstance(cvs_item
, CVSSymbol
):
493 symbol_db
.add(cvs_item
)
495 stats_keeper
.set_stats_reflect_exclude(True)
499 revision_excluder
.finish()
500 cvs_item_store
.close()
501 Ctx()._symbol
_db
.close()
502 Ctx()._cvs
_file
_db
.close()
507 class SortRevisionSummaryPass(Pass
):
508 """Sort the revision summary file."""
510 def register_artifacts(self
):
511 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
512 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_DATAFILE
)
514 def run(self
, run_options
, stats_keeper
):
515 Log().quiet("Sorting CVS revision summaries...")
517 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
518 artifact_manager
.get_temp_file(
519 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
))
523 class SortSymbolSummaryPass(Pass
):
524 """Sort the symbol summary file."""
526 def register_artifacts(self
):
527 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
528 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
530 def run(self
, run_options
, stats_keeper
):
531 Log().quiet("Sorting CVS symbol summaries...")
533 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
534 artifact_manager
.get_temp_file(
535 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
))
539 class InitializeChangesetsPass(Pass
):
540 """Create preliminary CommitSets."""
542 def register_artifacts(self
):
543 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
544 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
545 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
546 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
547 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
548 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
549 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
550 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
551 self
._register
_temp
_file
_needed
(config
.SUMMARY_SERIALIZER
)
552 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
553 self
._register
_temp
_file
_needed
(
554 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
556 def get_revision_changesets(self
):
557 """Generate revision changesets, one at a time.
559 Each time, yield a list of CVSRevisions that might potentially
560 consititute a changeset."""
562 # Create changesets for CVSRevisions:
563 old_metadata_id
= None
567 db
= OldSortableCVSRevisionDatabase(
568 artifact_manager
.get_temp_file(
569 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
571 self
.cvs_item_serializer
,
575 if cvs_rev
.metadata_id
!= old_metadata_id \
576 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
577 # Start a new changeset. First finish up the old changeset,
580 yield changeset_items
582 old_metadata_id
= cvs_rev
.metadata_id
583 changeset_items
.append(cvs_rev
)
584 old_timestamp
= cvs_rev
.timestamp
586 # Finish up the last changeset, if any:
588 yield changeset_items
590 def get_symbol_changesets(self
):
591 """Generate symbol changesets, one at a time.
593 Each time, yield a list of CVSSymbols that might potentially
594 consititute a changeset."""
599 db
= OldSortableCVSSymbolDatabase(
600 artifact_manager
.get_temp_file(
601 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
603 self
.cvs_item_serializer
,
606 for cvs_symbol
in db
:
607 if cvs_symbol
.symbol
.id != old_symbol_id
:
608 # Start a new changeset. First finish up the old changeset,
611 yield changeset_items
613 old_symbol_id
= cvs_symbol
.symbol
.id
614 changeset_items
.append(cvs_symbol
)
616 # Finish up the last changeset, if any:
618 yield changeset_items
621 def compare_items(a
, b
):
623 cmp(a
.timestamp
, b
.timestamp
)
624 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
625 or cmp([int(x
) for x
in a
.rev
.split('.')],
626 [int(x
) for x
in b
.rev
.split('.')])
629 def break_internal_dependencies(self
, changeset_items
):
630 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
632 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
633 belong in a single RevisionChangeset, but there might be internal
634 dependencies among the items. Return a list of lists, where each
635 sublist is a list of CVSRevisions and at least one internal
636 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
637 to be split, then the return value will contain a single value,
638 namely the original value of CHANGESET_ITEMS. Split
639 CHANGESET_ITEMS at most once, even though the resulting changesets
640 might themselves have internal dependencies."""
642 # We only look for succ dependencies, since by doing so we
643 # automatically cover pred dependencies as well. First create a
644 # list of tuples (pred, succ) of id pairs for CVSItems that depend
647 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
648 for cvs_item
in changeset_items
:
649 for next_id
in cvs_item
.get_succ_ids():
650 if next_id
in changeset_cvs_item_ids
:
651 # Sanity check: a CVSItem should never depend on itself:
652 if next_id
== cvs_item
.id:
653 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
655 dependencies
.append((cvs_item
.id, next_id
,))
658 # Sort the changeset_items in a defined order (chronological to the
659 # extent that the timestamps are correct and unique).
660 changeset_items
.sort(self
.compare_items
)
662 for (i
, changeset_item
) in enumerate(changeset_items
):
663 indexes
[changeset_item
.id] = i
664 # How many internal dependencies would be broken by breaking the
665 # Changeset after a particular index?
666 breaks
= [0] * len(changeset_items
)
667 for (pred
, succ
,) in dependencies
:
668 pred_index
= indexes
[pred
]
669 succ_index
= indexes
[succ
]
670 breaks
[min(pred_index
, succ_index
)] += 1
671 breaks
[max(pred_index
, succ_index
)] -= 1
675 for i
in range(1, len(breaks
)):
676 breaks
[i
] += breaks
[i
- 1]
677 for i
in range(0, len(breaks
) - 1):
678 if breaks
[i
] > best_count
:
680 best_count
= breaks
[i
]
681 best_time
= (changeset_items
[i
+ 1].timestamp
682 - changeset_items
[i
].timestamp
)
683 elif breaks
[i
] == best_count \
684 and (changeset_items
[i
+ 1].timestamp
685 - changeset_items
[i
].timestamp
) < best_time
:
687 best_count
= breaks
[i
]
688 best_time
= (changeset_items
[i
+ 1].timestamp
689 - changeset_items
[i
].timestamp
)
690 # Reuse the old changeset.id for the first of the split changesets.
691 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
693 return [changeset_items
]
695 def break_all_internal_dependencies(self
, changeset_items
):
696 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
698 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
699 be part of a single changeset. Break this list into sublists,
700 where the CVSRevisions in each sublist are free of mutual
703 # This method is written non-recursively to avoid any possible
704 # problems with recursion depth.
706 changesets_to_split
= [changeset_items
]
707 while changesets_to_split
:
708 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
709 if len(changesets
) == 1:
710 [changeset_items
] = changesets
711 yield changeset_items
713 # The changeset had to be split; see if either of the
714 # fragments have to be split:
716 changesets_to_split
.extend(changesets
)
718 def get_changesets(self
):
719 """Generate (Changeset, [CVSItem,...]) for all changesets.
721 The Changesets already have their internal dependencies broken.
722 The [CVSItem,...] list is the list of CVSItems in the
723 corresponding Changeset."""
725 for changeset_items
in self
.get_revision_changesets():
726 for split_changeset_items \
727 in self
.break_all_internal_dependencies(changeset_items
):
730 self
.changeset_key_generator
.gen_id(),
731 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
733 split_changeset_items
,
736 for changeset_items
in self
.get_symbol_changesets():
738 create_symbol_changeset(
739 self
.changeset_key_generator
.gen_id(),
740 changeset_items
[0].symbol
,
741 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
746 def run(self
, run_options
, stats_keeper
):
747 Log().quiet("Creating preliminary commit sets...")
749 Ctx()._projects
= read_projects(
750 artifact_manager
.get_temp_file(config
.PROJECTS
)
752 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
753 Ctx()._symbol
_db
= SymbolDatabase()
755 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'rb')
756 self
.cvs_item_serializer
= cPickle
.load(f
)
759 changeset_db
= ChangesetDatabase(
760 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
761 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
764 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
765 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
769 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
770 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
771 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
774 self
.changeset_key_generator
= KeyGenerator()
776 for (changeset
, changeset_items
) in self
.get_changesets():
777 if Log().is_on(Log
.DEBUG
):
778 Log().debug(repr(changeset
))
779 changeset_db
.store(changeset
)
780 for cvs_item
in changeset_items
:
781 self
.sorted_cvs_items_db
.add(cvs_item
)
782 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
784 self
.sorted_cvs_items_db
.close()
785 cvs_item_to_changeset_id
.close()
787 Ctx()._symbol
_db
.close()
788 Ctx()._cvs
_file
_db
.close()
790 del self
.cvs_item_serializer
795 class ProcessedChangesetLogger
:
797 self
.processed_changeset_ids
= []
799 def log(self
, changeset_id
):
800 if Log().is_on(Log
.DEBUG
):
801 self
.processed_changeset_ids
.append(changeset_id
)
804 if self
.processed_changeset_ids
:
806 'Consumed changeset ids %s'
807 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
809 del self
.processed_changeset_ids
[:]
812 class BreakRevisionChangesetCyclesPass(Pass
):
813 """Break up any dependency cycles involving only RevisionChangesets."""
815 def register_artifacts(self
):
816 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
817 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
818 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
819 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
820 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
821 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
822 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
823 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
824 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
825 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
826 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
828 def get_source_changesets(self
):
829 old_changeset_db
= ChangesetDatabase(
830 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
831 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
834 changeset_ids
= old_changeset_db
.keys()
836 for changeset_id
in changeset_ids
:
837 yield old_changeset_db
[changeset_id
]
839 old_changeset_db
.close()
842 def break_cycle(self
, cycle
):
843 """Break up one or more changesets in CYCLE to help break the cycle.
845 CYCLE is a list of Changesets where
847 cycle[i] depends on cycle[i - 1]
849 Break up one or more changesets in CYCLE to make progress towards
850 breaking the cycle. Update self.changeset_graph accordingly.
852 It is not guaranteed that the cycle will be broken by one call to
853 this routine, but at least some progress must be made."""
855 self
.processed_changeset_logger
.flush()
858 for i
in range(len(cycle
)):
859 # It's OK if this index wraps to -1:
860 link
= ChangesetGraphLink(
861 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
863 if best_i
is None or link
< best_link
:
867 if Log().is_on(Log
.DEBUG
):
869 'Breaking cycle %s by breaking node %x' % (
870 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
871 best_link
.changeset
.id,))
873 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
875 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
877 for changeset
in new_changesets
:
878 self
.changeset_graph
.add_new_changeset(changeset
)
880 def run(self
, run_options
, stats_keeper
):
881 Log().quiet("Breaking revision changeset dependency cycles...")
883 Ctx()._projects
= read_projects(
884 artifact_manager
.get_temp_file(config
.PROJECTS
)
886 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
887 Ctx()._symbol
_db
= SymbolDatabase()
888 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
889 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
890 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
894 artifact_manager
.get_temp_file(
895 config
.CVS_ITEM_TO_CHANGESET
),
896 artifact_manager
.get_temp_file(
897 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
898 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
899 artifact_manager
.get_temp_file(
900 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
903 changeset_db
= ChangesetDatabase(
904 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
905 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
908 self
.changeset_graph
= ChangesetGraph(
909 changeset_db
, cvs_item_to_changeset_id
913 for changeset
in self
.get_source_changesets():
914 changeset_db
.store(changeset
)
915 if isinstance(changeset
, RevisionChangeset
):
916 self
.changeset_graph
.add_changeset(changeset
)
917 max_changeset_id
= max(max_changeset_id
, changeset
.id)
919 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
921 self
.processed_changeset_logger
= ProcessedChangesetLogger()
923 # Consume the graph, breaking cycles using self.break_cycle():
924 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
925 cycle_breaker
=self
.break_cycle
927 self
.processed_changeset_logger
.log(changeset
.id)
929 self
.processed_changeset_logger
.flush()
930 del self
.processed_changeset_logger
932 self
.changeset_graph
.close()
933 self
.changeset_graph
= None
934 Ctx()._cvs
_items
_db
.close()
935 Ctx()._symbol
_db
.close()
936 Ctx()._cvs
_file
_db
.close()
941 class RevisionTopologicalSortPass(Pass
):
942 """Sort RevisionChangesets into commit order.
944 Also convert them to OrderedChangesets, without changing their ids."""
946 def register_artifacts(self
):
947 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
948 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
949 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
950 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
951 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
952 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
953 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
954 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
955 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
956 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
958 def get_source_changesets(self
, changeset_db
):
959 changeset_ids
= changeset_db
.keys()
961 for changeset_id
in changeset_ids
:
962 yield changeset_db
[changeset_id
]
964 def get_changesets(self
):
965 changeset_db
= ChangesetDatabase(
966 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
967 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
971 changeset_graph
= ChangesetGraph(
973 CVSItemToChangesetTable(
974 artifact_manager
.get_temp_file(
975 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
981 for changeset
in self
.get_source_changesets(changeset_db
):
982 if isinstance(changeset
, RevisionChangeset
):
983 changeset_graph
.add_changeset(changeset
)
990 changeset_ids
.append(None)
992 for (changeset
, time_range
) in changeset_graph
.consume_graph():
993 changeset_ids
.append(changeset
.id)
996 changeset_ids
.append(None)
998 for i
in range(1, len(changeset_ids
) - 1):
999 changeset
= changeset_db
[changeset_ids
[i
]]
1000 yield OrderedChangeset(
1001 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1002 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1004 changeset_graph
.close()
1006 def run(self
, run_options
, stats_keeper
):
1007 Log().quiet("Generating CVSRevisions in commit order...")
1009 Ctx()._projects
= read_projects(
1010 artifact_manager
.get_temp_file(config
.PROJECTS
)
1012 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1013 Ctx()._symbol
_db
= SymbolDatabase()
1014 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1015 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1016 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1019 changesets_revordered_db
= ChangesetDatabase(
1020 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1021 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1024 for changeset
in self
.get_changesets():
1025 changesets_revordered_db
.store(changeset
)
1027 changesets_revordered_db
.close()
1028 Ctx()._cvs
_items
_db
.close()
1029 Ctx()._symbol
_db
.close()
1030 Ctx()._cvs
_file
_db
.close()
1035 class BreakSymbolChangesetCyclesPass(Pass
):
1036 """Break up any dependency cycles involving only SymbolChangesets."""
1038 def register_artifacts(self
):
1039 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1040 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1041 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1042 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1043 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1044 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1045 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1046 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1047 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1048 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1049 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1051 def get_source_changesets(self
):
1052 old_changeset_db
= ChangesetDatabase(
1053 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1054 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1057 changeset_ids
= old_changeset_db
.keys()
1059 for changeset_id
in changeset_ids
:
1060 yield old_changeset_db
[changeset_id
]
1062 old_changeset_db
.close()
1064 def break_cycle(self
, cycle
):
1065 """Break up one or more changesets in CYCLE to help break the cycle.
1067 CYCLE is a list of Changesets where
1069 cycle[i] depends on cycle[i - 1]
1071 Break up one or more changesets in CYCLE to make progress towards
1072 breaking the cycle. Update self.changeset_graph accordingly.
1074 It is not guaranteed that the cycle will be broken by one call to
1075 this routine, but at least some progress must be made."""
1077 self
.processed_changeset_logger
.flush()
1080 for i
in range(len(cycle
)):
1081 # It's OK if this index wraps to -1:
1082 link
= ChangesetGraphLink(
1083 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1085 if best_i
is None or link
< best_link
:
1089 if Log().is_on(Log
.DEBUG
):
1091 'Breaking cycle %s by breaking node %x' % (
1092 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1093 best_link
.changeset
.id,))
1095 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1097 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1099 for changeset
in new_changesets
:
1100 self
.changeset_graph
.add_new_changeset(changeset
)
1102 def run(self
, run_options
, stats_keeper
):
1103 Log().quiet("Breaking symbol changeset dependency cycles...")
1105 Ctx()._projects
= read_projects(
1106 artifact_manager
.get_temp_file(config
.PROJECTS
)
1108 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1109 Ctx()._symbol
_db
= SymbolDatabase()
1110 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1111 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1112 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1116 artifact_manager
.get_temp_file(
1117 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1118 artifact_manager
.get_temp_file(
1119 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1120 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1121 artifact_manager
.get_temp_file(
1122 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1125 changeset_db
= ChangesetDatabase(
1126 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1127 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1130 self
.changeset_graph
= ChangesetGraph(
1131 changeset_db
, cvs_item_to_changeset_id
1134 max_changeset_id
= 0
1135 for changeset
in self
.get_source_changesets():
1136 changeset_db
.store(changeset
)
1137 if isinstance(changeset
, SymbolChangeset
):
1138 self
.changeset_graph
.add_changeset(changeset
)
1139 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1141 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1143 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1145 # Consume the graph, breaking cycles using self.break_cycle():
1146 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1147 cycle_breaker
=self
.break_cycle
1149 self
.processed_changeset_logger
.log(changeset
.id)
1151 self
.processed_changeset_logger
.flush()
1152 del self
.processed_changeset_logger
1154 self
.changeset_graph
.close()
1155 self
.changeset_graph
= None
1156 Ctx()._cvs
_items
_db
.close()
1157 Ctx()._symbol
_db
.close()
1158 Ctx()._cvs
_file
_db
.close()
1163 class BreakAllChangesetCyclesPass(Pass
):
1164 """Break up any dependency cycles that are closed by SymbolChangesets."""
1166 def register_artifacts(self
):
1167 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1168 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1169 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1170 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1171 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1172 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1173 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1174 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1175 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1176 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1177 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1179 def get_source_changesets(self
):
1180 old_changeset_db
= ChangesetDatabase(
1181 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1182 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1185 changeset_ids
= old_changeset_db
.keys()
1187 for changeset_id
in changeset_ids
:
1188 yield old_changeset_db
[changeset_id
]
1190 old_changeset_db
.close()
1192 def _split_retrograde_changeset(self
, changeset
):
1193 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1195 Log().debug('Breaking retrograde changeset %x' % (changeset
.id,))
1197 self
.changeset_graph
.delete_changeset(changeset
)
1199 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1201 for cvs_branch
in changeset
.iter_cvs_items():
1202 max_pred_ordinal
= 0
1203 min_succ_ordinal
= sys
.maxint
1205 for pred_id
in cvs_branch
.get_pred_ids():
1206 pred_ordinal
= self
.ordinals
.get(
1207 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1208 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1210 for succ_id
in cvs_branch
.get_succ_ids():
1211 succ_ordinal
= self
.ordinals
.get(
1212 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1213 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1215 assert max_pred_ordinal
< min_succ_ordinal
1216 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1218 # Find the earliest successor ordinal:
1219 min_min_succ_ordinal
= sys
.maxint
1220 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1221 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1225 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1226 if max_pred_ordinal
>= min_min_succ_ordinal
:
1227 late_item_ids
.append(id)
1229 early_item_ids
.append(id)
1231 assert early_item_ids
1232 assert late_item_ids
1234 early_changeset
= changeset
.create_split_changeset(
1235 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1236 late_changeset
= changeset
.create_split_changeset(
1237 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1239 self
.changeset_graph
.add_new_changeset(early_changeset
)
1240 self
.changeset_graph
.add_new_changeset(late_changeset
)
1242 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1244 # Because of the way we constructed it, the early changeset should
1245 # not have to be split:
1246 assert not early_split
1248 self
._split
_if
_retrograde
(late_changeset
.id)
1250 def _split_if_retrograde(self
, changeset_id
):
1251 node
= self
.changeset_graph
[changeset_id
]
1254 for id in node
.pred_ids
1255 if id in self
.ordinals
1257 pred_ordinals
.sort()
1260 for id in node
.succ_ids
1261 if id in self
.ordinals
1263 succ_ordinals
.sort()
1264 if pred_ordinals
and succ_ordinals \
1265 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1266 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1271 def break_segment(self
, segment
):
1272 """Break a changeset in SEGMENT[1:-1].
1274 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1275 that range are SymbolChangesets."""
1279 for i
in range(1, len(segment
) - 1):
1280 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1282 if best_i
is None or link
< best_link
:
1286 if Log().is_on(Log
.DEBUG
):
1288 'Breaking segment %s by breaking node %x' % (
1289 ' -> '.join(['%x' % node
.id for node
in segment
]),
1290 best_link
.changeset
.id,))
1292 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1294 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1296 for changeset
in new_changesets
:
1297 self
.changeset_graph
.add_new_changeset(changeset
)
1299 def break_cycle(self
, cycle
):
1300 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1302 CYCLE is a list of SymbolChangesets where
1304 cycle[i] depends on cycle[i - 1]
1306 . Break up one or more changesets in CYCLE to make progress
1307 towards breaking the cycle. Update self.changeset_graph
1310 It is not guaranteed that the cycle will be broken by one call to
1311 this routine, but at least some progress must be made."""
1313 if Log().is_on(Log
.DEBUG
):
1315 'Breaking cycle %s' % (
1316 ' -> '.join(['%x' % changeset
.id
1317 for changeset
in cycle
+ [cycle
[0]]]),))
1319 # Unwrap the cycle into a segment then break the segment:
1320 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1322 def run(self
, run_options
, stats_keeper
):
1323 Log().quiet("Breaking CVSSymbol dependency loops...")
1325 Ctx()._projects
= read_projects(
1326 artifact_manager
.get_temp_file(config
.PROJECTS
)
1328 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1329 Ctx()._symbol
_db
= SymbolDatabase()
1330 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1331 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1332 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1336 artifact_manager
.get_temp_file(
1337 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1338 artifact_manager
.get_temp_file(
1339 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1340 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1341 artifact_manager
.get_temp_file(
1342 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1345 self
.changeset_db
= ChangesetDatabase(
1346 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1347 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1350 self
.changeset_graph
= ChangesetGraph(
1351 self
.changeset_db
, self
.cvs_item_to_changeset_id
1354 # A map {changeset_id : ordinal} for OrderedChangesets:
1356 # A map {ordinal : changeset_id}:
1357 ordered_changeset_map
= {}
1358 # A list of all BranchChangeset ids:
1359 branch_changeset_ids
= []
1360 max_changeset_id
= 0
1361 for changeset
in self
.get_source_changesets():
1362 self
.changeset_db
.store(changeset
)
1363 self
.changeset_graph
.add_changeset(changeset
)
1364 if isinstance(changeset
, OrderedChangeset
):
1365 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1366 self
.ordinals
[changeset
.id] = changeset
.ordinal
1367 elif isinstance(changeset
, BranchChangeset
):
1368 branch_changeset_ids
.append(changeset
.id)
1369 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1371 # An array of ordered_changeset ids, indexed by ordinal:
1372 ordered_changesets
= []
1373 for ordinal
in range(len(ordered_changeset_map
)):
1374 id = ordered_changeset_map
[ordinal
]
1375 ordered_changesets
.append(id)
1377 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1378 del ordered_changeset_map
1380 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1382 # First we scan through all BranchChangesets looking for
1383 # changesets that are individually "retrograde" and splitting
1385 for changeset_id
in branch_changeset_ids
:
1386 self
._split
_if
_retrograde
(changeset_id
)
1390 next_ordered_changeset
= 0
1392 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1394 while self
.changeset_graph
:
1395 # Consume any nodes that don't have predecessors:
1396 for (changeset
, time_range
) \
1397 in self
.changeset_graph
.consume_nopred_nodes():
1398 self
.processed_changeset_logger
.log(changeset
.id)
1399 if changeset
.id in ordered_changeset_ids
:
1400 next_ordered_changeset
+= 1
1401 ordered_changeset_ids
.remove(changeset
.id)
1403 self
.processed_changeset_logger
.flush()
1405 if not self
.changeset_graph
:
1408 # Now work on the next ordered changeset that has not yet been
1409 # processed. BreakSymbolChangesetCyclesPass has broken any
1410 # cycles involving only SymbolChangesets, so the presence of a
1411 # cycle implies that there is at least one ordered changeset
1412 # left in the graph:
1413 assert next_ordered_changeset
< len(ordered_changesets
)
1415 id = ordered_changesets
[next_ordered_changeset
]
1416 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1418 if Log().is_on(Log
.DEBUG
):
1419 Log().debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1420 self
.break_segment(path
)
1422 # There were no ordered changesets among the reachable
1423 # predecessors, so do generic cycle-breaking:
1424 if Log().is_on(Log
.DEBUG
):
1426 'Breaking generic cycle found from %s'
1427 % (self
.changeset_db
[id],)
1429 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1431 del self
.processed_changeset_logger
1432 self
.changeset_graph
.close()
1433 self
.changeset_graph
= None
1434 self
.cvs_item_to_changeset_id
= None
1435 self
.changeset_db
= None
1440 class TopologicalSortPass(Pass
):
1441 """Sort changesets into commit order."""
1443 def register_artifacts(self
):
1444 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1445 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1446 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1447 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1448 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1449 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1450 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1451 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1452 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1454 def get_source_changesets(self
, changeset_db
):
1455 for changeset_id
in changeset_db
.keys():
1456 yield changeset_db
[changeset_id
]
1458 def get_changesets(self
):
1459 """Generate (changeset, timestamp) pairs in commit order."""
1461 changeset_db
= ChangesetDatabase(
1462 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1463 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1466 changeset_graph
= ChangesetGraph(
1468 CVSItemToChangesetTable(
1469 artifact_manager
.get_temp_file(
1470 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1475 symbol_changeset_ids
= set()
1477 for changeset
in self
.get_source_changesets(changeset_db
):
1478 changeset_graph
.add_changeset(changeset
)
1479 if isinstance(changeset
, SymbolChangeset
):
1480 symbol_changeset_ids
.add(changeset
.id)
1482 # Ensure a monotonically-increasing timestamp series by keeping
1483 # track of the previous timestamp and ensuring that the following
1485 timestamper
= Timestamper()
1487 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1488 timestamp
= timestamper
.get(
1489 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1491 yield (changeset
, timestamp
)
1493 changeset_graph
.close()
1495 def run(self
, run_options
, stats_keeper
):
1496 Log().quiet("Generating CVSRevisions in commit order...")
1498 Ctx()._projects
= read_projects(
1499 artifact_manager
.get_temp_file(config
.PROJECTS
)
1501 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1502 Ctx()._symbol
_db
= SymbolDatabase()
1503 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1504 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1505 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1508 sorted_changesets
= open(
1509 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1512 for (changeset
, timestamp
) in self
.get_changesets():
1513 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1515 sorted_changesets
.close()
1517 Ctx()._cvs
_items
_db
.close()
1518 Ctx()._symbol
_db
.close()
1519 Ctx()._cvs
_file
_db
.close()
1524 class CreateRevsPass(Pass
):
1525 """Generate the SVNCommit <-> CVSRevision mapping databases.
1527 SVNCommitCreator also calls SymbolingsLogger to register
1528 CVSRevisions that represent an opening or closing for a path on a
1529 branch or tag. See SymbolingsLogger for more details.
1531 This pass was formerly known as pass5."""
1533 def register_artifacts(self
):
1534 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1535 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1536 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1537 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1538 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1539 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1540 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1541 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1542 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1543 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1544 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1545 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1547 def get_changesets(self
):
1548 """Generate (changeset,timestamp,) tuples in commit order."""
1550 changeset_db
= ChangesetDatabase(
1551 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1552 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1556 artifact_manager
.get_temp_file(
1557 config
.CHANGESETS_SORTED_DATAFILE
)):
1558 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1559 yield (changeset_db
[changeset_id
], timestamp
)
1561 changeset_db
.close()
1563 def get_svn_commits(self
, creator
):
1564 """Generate the SVNCommits, in order."""
1566 for (changeset
, timestamp
) in self
.get_changesets():
1567 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1570 def log_svn_commit(self
, svn_commit
):
1571 """Output information about SVN_COMMIT."""
1574 'Creating Subversion r%d (%s)'
1575 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1578 if isinstance(svn_commit
, SVNRevisionCommit
):
1579 for cvs_rev
in svn_commit
.cvs_revs
:
1580 Log().verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1582 def run(self
, run_options
, stats_keeper
):
1583 Log().quiet("Mapping CVS revisions to Subversion commits...")
1585 Ctx()._projects
= read_projects(
1586 artifact_manager
.get_temp_file(config
.PROJECTS
)
1588 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1589 Ctx()._symbol
_db
= SymbolDatabase()
1590 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1591 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1592 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1595 Ctx()._symbolings
_logger
= SymbolingsLogger()
1597 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1599 creator
= SVNCommitCreator()
1600 for svn_commit
in self
.get_svn_commits(creator
):
1601 self
.log_svn_commit(svn_commit
)
1602 persistence_manager
.put_svn_commit(svn_commit
)
1604 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1607 persistence_manager
.close()
1608 Ctx()._symbolings
_logger
.close()
1609 Ctx()._cvs
_items
_db
.close()
1610 Ctx()._symbol
_db
.close()
1611 Ctx()._cvs
_file
_db
.close()
1616 class SortSymbolsPass(Pass
):
1617 """This pass was formerly known as pass6."""
1619 def register_artifacts(self
):
1620 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1621 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1623 def run(self
, run_options
, stats_keeper
):
1624 Log().quiet("Sorting symbolic name source revisions...")
1627 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1628 artifact_manager
.get_temp_file(
1629 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1630 options
=['-k', '1,1', '-k', '2,2n', '-k', '3'],
1635 class IndexSymbolsPass(Pass
):
1636 """This pass was formerly known as pass7."""
1638 def register_artifacts(self
):
1639 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1640 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1641 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1642 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1644 def generate_offsets_for_symbolings(self
):
1645 """This function iterates through all the lines in
1646 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1647 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1648 where SYMBOLIC_NAME is first encountered. This will allow us to
1649 seek to the various offsets in the file and sequentially read only
1650 the openings and closings that we need."""
1655 artifact_manager
.get_temp_file(
1656 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1664 id, svn_revnum
, ignored
= line
.split(" ", 2)
1667 Log().verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1674 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1675 cPickle
.dump(offsets
, offsets_db
, -1)
1678 def run(self
, run_options
, stats_keeper
):
1679 Log().quiet("Determining offsets for all symbolic names...")
1680 Ctx()._projects
= read_projects(
1681 artifact_manager
.get_temp_file(config
.PROJECTS
)
1683 Ctx()._symbol
_db
= SymbolDatabase()
1684 self
.generate_offsets_for_symbolings()
1685 Ctx()._symbol
_db
.close()
1686 Log().quiet("Done.")
1689 class OutputPass(Pass
):
1690 """This pass was formerly known as pass8."""
1692 def register_artifacts(self
):
1693 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1694 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1695 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1696 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1697 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1698 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1699 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1700 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1701 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1702 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1703 Ctx().output_option
.register_artifacts(self
)
1705 def run(self
, run_options
, stats_keeper
):
1706 Ctx()._projects
= read_projects(
1707 artifact_manager
.get_temp_file(config
.PROJECTS
)
1709 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1710 Ctx()._metadata
_db
= MetadataDatabase(
1711 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1712 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1715 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1716 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1717 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1719 Ctx()._symbol
_db
= SymbolDatabase()
1720 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1722 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1725 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1727 svn_commit
.output(Ctx().output_option
)
1729 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1731 Ctx().output_option
.cleanup()
1732 Ctx()._persistence
_manager
.close()
1734 Ctx()._symbol
_db
.close()
1735 Ctx()._cvs
_items
_db
.close()
1736 Ctx()._metadata
_db
.close()
1737 Ctx()._cvs
_file
_db
.close()
1740 # The list of passes constituting a run of cvs2svn:
1743 CleanMetadataPass(),
1744 CollateSymbolsPass(),
1745 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1746 FilterSymbolsPass(),
1747 SortRevisionSummaryPass(),
1748 SortSymbolSummaryPass(),
1749 InitializeChangesetsPass(),
1750 #CheckIndexedItemStoreDependenciesPass(
1751 # config.CVS_ITEMS_SORTED_STORE,
1752 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1753 BreakRevisionChangesetCyclesPass(),
1754 RevisionTopologicalSortPass(),
1755 BreakSymbolChangesetCyclesPass(),
1756 BreakAllChangesetCyclesPass(),
1757 TopologicalSortPass(),