1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import Log
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.collect_data
import CollectData
78 from cvs2svn_lib
.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib
.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass
):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self
):
88 self
._register
_temp
_file
(config
.PROJECTS
)
89 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
90 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
91 self
._register
_temp
_file
(config
.METADATA_STORE
)
92 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
93 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
95 def run(self
, run_options
, stats_keeper
):
96 Log().quiet("Examining all CVS ',v' files...")
98 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
99 cd
= CollectData(stats_keeper
)
100 for project
in run_options
.projects
:
101 cd
.process_project(project
)
102 run_options
.projects
= None
104 fatal_errors
= cd
.close()
107 raise FatalException("Pass 1 complete.\n"
110 + "\n".join(fatal_errors
) + "\n"
111 + "Exited due to fatal error(s).")
113 Ctx()._cvs
_path
_db
.close()
114 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
118 class CleanMetadataPass(Pass
):
119 """Clean up CVS revision metadata and write it to a new database."""
121 def register_artifacts(self
):
122 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
123 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
124 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
125 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
127 def _get_clean_author(self
, author
):
128 """Return AUTHOR, converted appropriately to UTF8.
130 Raise a UnicodeException if it cannot be converted using the
131 configured cvs_author_decoder."""
134 return self
._authors
[author
]
139 clean_author
= Ctx().cvs_author_decoder(author
)
141 self
._authors
[author
] = author
142 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
145 clean_author
= clean_author
.encode('utf8')
147 self
._authors
[author
] = author
148 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
150 self
._authors
[author
] = clean_author
153 def _get_clean_log_msg(self
, log_msg
):
154 """Return LOG_MSG, converted appropriately to UTF8.
156 Raise a UnicodeException if it cannot be converted using the
157 configured cvs_log_decoder."""
160 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
163 'Problem decoding log message:\n'
167 % ('-' * 75, log_msg
, '-' * 75,)
171 return clean_log_msg
.encode('utf8')
174 'Problem encoding log message:\n'
178 % ('-' * 75, log_msg
, '-' * 75,)
181 def _clean_metadata(self
, metadata
):
182 """Clean up METADATA by overwriting its members as necessary."""
185 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
186 except UnicodeError, e
:
187 Log().warn('%s: %s' % (warning_prefix
, e
,))
191 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
192 except UnicodeError, e
:
193 Log().warn('%s: %s' % (warning_prefix
, e
,))
196 def run(self
, run_options
, stats_keeper
):
197 Log().quiet("Converting metadata to UTF8...")
198 metadata_db
= MetadataDatabase(
199 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
200 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
203 metadata_clean_db
= MetadataDatabase(
204 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
205 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
209 self
.warnings
= False
211 # A map {author : clean_author} for those known (to avoid
212 # repeating warnings):
215 for id in metadata_db
.iterkeys():
216 metadata
= metadata_db
[id]
218 # Record the original author name because it might be needed for
219 # expanding CVS keywords:
220 metadata
.original_author
= metadata
.author
222 self
._clean
_metadata
(metadata
)
224 metadata_clean_db
[id] = metadata
228 'There were warnings converting author names and/or log messages\n'
229 'to Unicode (see messages above). Please restart this pass\n'
230 'with one or more \'--encoding\' parameters or with\n'
231 '\'--fallback-encoding\'.'
234 metadata_clean_db
.close()
239 class CollateSymbolsPass(Pass
):
240 """Divide symbols into branches, tags, and excludes."""
246 ExcludedSymbol
: 'exclude',
250 def register_artifacts(self
):
251 self
._register
_temp
_file
(config
.SYMBOL_DB
)
252 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
253 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
255 def get_symbol(self
, run_options
, stats
):
256 """Use StrategyRules to decide what to do with a symbol.
258 STATS is an instance of symbol_statistics._Stats describing an
259 instance of Symbol or Trunk. To determine how the symbol is to be
260 converted, consult the StrategyRules in the project's
261 symbol_strategy_rules. Each rule is allowed a chance to change
262 the way the symbol will be converted. If the symbol is not a
263 Trunk or TypedSymbol after all rules have run, raise
264 IndeterminateSymbolException."""
267 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
269 symbol
= rule
.get_symbol(symbol
, stats
)
270 assert symbol
is not None
272 stats
.check_valid(symbol
)
276 def log_symbol_summary(self
, stats
, symbol
):
277 if not self
.symbol_info_file
:
280 if isinstance(symbol
, Trunk
):
282 preferred_parent_name
= '.'
284 name
= stats
.lod
.name
285 if symbol
.preferred_parent_id
is None:
286 preferred_parent_name
= '.'
288 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
289 if isinstance(preferred_parent
, Trunk
):
290 preferred_parent_name
= '.trunk.'
292 preferred_parent_name
= preferred_parent
.name
294 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
295 symbol_path
= symbol
.base_path
299 self
.symbol_info_file
.write(
300 '%-5d %-30s %-10s %s %s\n' % (
301 stats
.lod
.project
.id,
303 self
.conversion_names
[symbol
.__class
__],
305 preferred_parent_name
,
308 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
309 parent_counts
= stats
.possible_parents
.items()
311 self
.symbol_info_file
.write(' # Possible parents:\n')
312 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
313 for (pp
, count
) in parent_counts
:
314 if isinstance(pp
, Trunk
):
315 self
.symbol_info_file
.write(
316 ' # .trunk. : %d\n' % (count
,)
319 self
.symbol_info_file
.write(
320 ' # %s : %d\n' % (pp
.name
, count
,)
323 def get_symbols(self
, run_options
):
324 """Return a map telling how to convert symbols.
326 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
327 indicating how each symbol should be converted. Trunk objects in
328 SYMBOL_STATS are passed through unchanged. One object is included
329 in the return value for each line of development described in
332 Raise FatalError if there was an error."""
337 if Ctx().symbol_info_filename
is not None:
338 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
339 self
.symbol_info_file
.write(
340 '# Columns: project_id symbol_name conversion symbol_path '
341 'preferred_parent_name\n'
344 self
.symbol_info_file
= None
346 # Initialize each symbol strategy rule a single time, even if it
347 # is used in more than one project. First define a map from
348 # object id to symbol strategy rule:
350 for rule_list
in run_options
.project_symbol_strategy_rules
:
351 for rule
in rule_list
:
352 rules
[id(rule
)] = rule
354 for rule
in rules
.itervalues():
355 rule
.start(self
.symbol_stats
)
359 for stats
in self
.symbol_stats
:
361 symbol
= self
.get_symbol(run_options
, stats
)
362 except IndeterminateSymbolException
, e
:
363 self
.log_symbol_summary(stats
, stats
.lod
)
364 mismatches
.append(e
.stats
)
365 except SymbolPlanError
, e
:
366 self
.log_symbol_summary(stats
, stats
.lod
)
369 self
.log_symbol_summary(stats
, symbol
)
370 retval
[stats
.lod
] = symbol
372 for rule
in rules
.itervalues():
375 if self
.symbol_info_file
:
376 self
.symbol_info_file
.close()
378 del self
.symbol_info_file
380 if errors
or mismatches
:
381 s
= ['Problems determining how symbols should be converted:\n']
383 s
.append('%s\n' % (e
,))
386 'It is not clear how the following symbols '
387 'should be converted.\n'
388 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
390 '--symbol-default to resolve the ambiguity.\n'
392 for stats
in mismatches
:
393 s
.append(' %s\n' % (stats
,))
394 raise FatalError(''.join(s
))
398 def run(self
, run_options
, stats_keeper
):
399 Ctx()._projects
= read_projects(
400 artifact_manager
.get_temp_file(config
.PROJECTS
)
402 self
.symbol_stats
= SymbolStatistics(
403 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
406 symbol_map
= self
.get_symbols(run_options
)
408 # Check the symbols for consistency and bail out if there were errors:
409 self
.symbol_stats
.check_consistency(symbol_map
)
411 # Check that the symbols all have SVN paths set and that the paths
413 Ctx().output_option
.check_symbols(symbol_map
)
415 for symbol
in symbol_map
.itervalues():
416 if isinstance(symbol
, ExcludedSymbol
):
417 self
.symbol_stats
.exclude_symbol(symbol
)
419 create_symbol_database(symbol_map
.values())
421 del self
.symbol_stats
426 class FilterSymbolsPass(Pass
):
427 """Delete any branches/tags that are to be excluded.
429 Also delete revisions on excluded branches, and delete other
430 references to the excluded symbols."""
432 def register_artifacts(self
):
433 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
434 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
435 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
436 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
437 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
438 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
439 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
440 Ctx().revision_collector
.register_artifacts(self
)
442 def run(self
, run_options
, stats_keeper
):
443 Ctx()._projects
= read_projects(
444 artifact_manager
.get_temp_file(config
.PROJECTS
)
446 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
447 Ctx()._symbol
_db
= SymbolDatabase()
448 cvs_item_store
= OldCVSItemStore(
449 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
451 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
452 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
453 cPickle
.dump(cvs_item_serializer
, f
, -1)
456 rev_db
= NewSortableCVSRevisionDatabase(
457 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
461 symbol_db
= NewSortableCVSSymbolDatabase(
462 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
466 revision_collector
= Ctx().revision_collector
468 Log().quiet("Filtering out excluded symbols and summarizing items...")
470 stats_keeper
.reset_cvs_rev_info()
471 revision_collector
.start()
473 # Process the cvs items store one file at a time:
474 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
475 Log().verbose(cvs_file_items
.cvs_file
.filename
)
476 cvs_file_items
.filter_excluded_symbols()
477 cvs_file_items
.mutate_symbols()
478 cvs_file_items
.adjust_parents()
479 cvs_file_items
.refine_symbols()
480 cvs_file_items
.determine_revision_properties(
481 Ctx().revision_property_setters
483 cvs_file_items
.record_opened_symbols()
484 cvs_file_items
.record_closed_symbols()
485 cvs_file_items
.check_link_consistency()
487 # Give the revision collector a chance to collect data about the
489 revision_collector
.process_file(cvs_file_items
)
491 # Store whatever is left to the new file and update statistics:
492 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
493 for cvs_item
in cvs_file_items
.values():
494 stats_keeper
.record_cvs_item(cvs_item
)
496 if isinstance(cvs_item
, CVSRevision
):
498 elif isinstance(cvs_item
, CVSSymbol
):
499 symbol_db
.add(cvs_item
)
501 stats_keeper
.set_stats_reflect_exclude(True)
505 revision_collector
.finish()
506 cvs_item_store
.close()
507 Ctx()._symbol
_db
.close()
508 Ctx()._cvs
_path
_db
.close()
513 class SortRevisionsPass(Pass
):
514 """Sort the revisions file."""
516 def register_artifacts(self
):
517 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
518 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
520 def run(self
, run_options
, stats_keeper
):
521 Log().quiet("Sorting CVS revision summaries...")
523 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
524 artifact_manager
.get_temp_file(
525 config
.CVS_REVS_SORTED_DATAFILE
527 tempdirs
=[Ctx().tmpdir
],
532 class SortSymbolsPass(Pass
):
533 """Sort the symbols file."""
535 def register_artifacts(self
):
536 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
537 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
539 def run(self
, run_options
, stats_keeper
):
540 Log().quiet("Sorting CVS symbol summaries...")
542 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
543 artifact_manager
.get_temp_file(
544 config
.CVS_SYMBOLS_SORTED_DATAFILE
546 tempdirs
=[Ctx().tmpdir
],
551 class InitializeChangesetsPass(Pass
):
552 """Create preliminary CommitSets."""
554 def register_artifacts(self
):
555 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
556 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
557 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
558 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
559 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
560 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
561 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
562 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
563 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
564 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
565 self
._register
_temp
_file
_needed
(
566 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
568 def get_revision_changesets(self
):
569 """Generate revision changesets, one at a time.
571 Each time, yield a list of CVSRevisions that might potentially
572 consititute a changeset."""
574 # Create changesets for CVSRevisions:
575 old_metadata_id
= None
579 db
= OldSortableCVSRevisionDatabase(
580 artifact_manager
.get_temp_file(
581 config
.CVS_REVS_SORTED_DATAFILE
583 self
.cvs_item_serializer
,
587 if cvs_rev
.metadata_id
!= old_metadata_id \
588 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
589 # Start a new changeset. First finish up the old changeset,
592 yield changeset_items
594 old_metadata_id
= cvs_rev
.metadata_id
595 changeset_items
.append(cvs_rev
)
596 old_timestamp
= cvs_rev
.timestamp
598 # Finish up the last changeset, if any:
600 yield changeset_items
602 def get_symbol_changesets(self
):
603 """Generate symbol changesets, one at a time.
605 Each time, yield a list of CVSSymbols that might potentially
606 consititute a changeset."""
611 db
= OldSortableCVSSymbolDatabase(
612 artifact_manager
.get_temp_file(
613 config
.CVS_SYMBOLS_SORTED_DATAFILE
615 self
.cvs_item_serializer
,
618 for cvs_symbol
in db
:
619 if cvs_symbol
.symbol
.id != old_symbol_id
:
620 # Start a new changeset. First finish up the old changeset,
623 yield changeset_items
625 old_symbol_id
= cvs_symbol
.symbol
.id
626 changeset_items
.append(cvs_symbol
)
628 # Finish up the last changeset, if any:
630 yield changeset_items
633 def compare_items(a
, b
):
635 cmp(a
.timestamp
, b
.timestamp
)
636 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
637 or cmp([int(x
) for x
in a
.rev
.split('.')],
638 [int(x
) for x
in b
.rev
.split('.')])
641 def break_internal_dependencies(self
, changeset_items
):
642 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
644 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
645 belong in a single RevisionChangeset, but there might be internal
646 dependencies among the items. Return a list of lists, where each
647 sublist is a list of CVSRevisions and at least one internal
648 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
649 to be split, then the return value will contain a single value,
650 namely the original value of CHANGESET_ITEMS. Split
651 CHANGESET_ITEMS at most once, even though the resulting changesets
652 might themselves have internal dependencies."""
654 # We only look for succ dependencies, since by doing so we
655 # automatically cover pred dependencies as well. First create a
656 # list of tuples (pred, succ) of id pairs for CVSItems that depend
659 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
660 for cvs_item
in changeset_items
:
661 for next_id
in cvs_item
.get_succ_ids():
662 if next_id
in changeset_cvs_item_ids
:
663 # Sanity check: a CVSItem should never depend on itself:
664 if next_id
== cvs_item
.id:
665 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
667 dependencies
.append((cvs_item
.id, next_id
,))
670 # Sort the changeset_items in a defined order (chronological to the
671 # extent that the timestamps are correct and unique).
672 changeset_items
.sort(self
.compare_items
)
674 for (i
, changeset_item
) in enumerate(changeset_items
):
675 indexes
[changeset_item
.id] = i
676 # How many internal dependencies would be broken by breaking the
677 # Changeset after a particular index?
678 breaks
= [0] * len(changeset_items
)
679 for (pred
, succ
,) in dependencies
:
680 pred_index
= indexes
[pred
]
681 succ_index
= indexes
[succ
]
682 breaks
[min(pred_index
, succ_index
)] += 1
683 breaks
[max(pred_index
, succ_index
)] -= 1
687 for i
in range(1, len(breaks
)):
688 breaks
[i
] += breaks
[i
- 1]
689 for i
in range(0, len(breaks
) - 1):
690 if breaks
[i
] > best_count
:
692 best_count
= breaks
[i
]
693 best_time
= (changeset_items
[i
+ 1].timestamp
694 - changeset_items
[i
].timestamp
)
695 elif breaks
[i
] == best_count \
696 and (changeset_items
[i
+ 1].timestamp
697 - changeset_items
[i
].timestamp
) < best_time
:
699 best_count
= breaks
[i
]
700 best_time
= (changeset_items
[i
+ 1].timestamp
701 - changeset_items
[i
].timestamp
)
702 # Reuse the old changeset.id for the first of the split changesets.
703 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
705 return [changeset_items
]
707 def break_all_internal_dependencies(self
, changeset_items
):
708 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
710 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
711 be part of a single changeset. Break this list into sublists,
712 where the CVSRevisions in each sublist are free of mutual
715 # This method is written non-recursively to avoid any possible
716 # problems with recursion depth.
718 changesets_to_split
= [changeset_items
]
719 while changesets_to_split
:
720 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
721 if len(changesets
) == 1:
722 [changeset_items
] = changesets
723 yield changeset_items
725 # The changeset had to be split; see if either of the
726 # fragments have to be split:
728 changesets_to_split
.extend(changesets
)
730 def get_changesets(self
):
731 """Generate (Changeset, [CVSItem,...]) for all changesets.
733 The Changesets already have their internal dependencies broken.
734 The [CVSItem,...] list is the list of CVSItems in the
735 corresponding Changeset."""
737 for changeset_items
in self
.get_revision_changesets():
738 for split_changeset_items \
739 in self
.break_all_internal_dependencies(changeset_items
):
742 self
.changeset_key_generator
.gen_id(),
743 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
745 split_changeset_items
,
748 for changeset_items
in self
.get_symbol_changesets():
750 create_symbol_changeset(
751 self
.changeset_key_generator
.gen_id(),
752 changeset_items
[0].symbol
,
753 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
758 def run(self
, run_options
, stats_keeper
):
759 Log().quiet("Creating preliminary commit sets...")
761 Ctx()._projects
= read_projects(
762 artifact_manager
.get_temp_file(config
.PROJECTS
)
764 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
765 Ctx()._symbol
_db
= SymbolDatabase()
767 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
768 self
.cvs_item_serializer
= cPickle
.load(f
)
771 changeset_db
= ChangesetDatabase(
772 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
773 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
776 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
777 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
781 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
782 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
783 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
786 self
.changeset_key_generator
= KeyGenerator()
788 for (changeset
, changeset_items
) in self
.get_changesets():
789 if Log().is_on(Log
.DEBUG
):
790 Log().debug(repr(changeset
))
791 changeset_db
.store(changeset
)
792 for cvs_item
in changeset_items
:
793 self
.sorted_cvs_items_db
.add(cvs_item
)
794 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
796 self
.sorted_cvs_items_db
.close()
797 cvs_item_to_changeset_id
.close()
799 Ctx()._symbol
_db
.close()
800 Ctx()._cvs
_path
_db
.close()
802 del self
.cvs_item_serializer
807 class ProcessedChangesetLogger
:
809 self
.processed_changeset_ids
= []
811 def log(self
, changeset_id
):
812 if Log().is_on(Log
.DEBUG
):
813 self
.processed_changeset_ids
.append(changeset_id
)
816 if self
.processed_changeset_ids
:
818 'Consumed changeset ids %s'
819 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
821 del self
.processed_changeset_ids
[:]
824 class BreakRevisionChangesetCyclesPass(Pass
):
825 """Break up any dependency cycles involving only RevisionChangesets."""
827 def register_artifacts(self
):
828 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
829 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
830 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
831 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
832 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
833 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
834 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
835 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
836 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
837 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
838 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
840 def get_source_changesets(self
):
841 old_changeset_db
= ChangesetDatabase(
842 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
843 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
846 changeset_ids
= old_changeset_db
.keys()
848 for changeset_id
in changeset_ids
:
849 yield old_changeset_db
[changeset_id
]
851 old_changeset_db
.close()
854 def break_cycle(self
, cycle
):
855 """Break up one or more changesets in CYCLE to help break the cycle.
857 CYCLE is a list of Changesets where
859 cycle[i] depends on cycle[i - 1]
861 Break up one or more changesets in CYCLE to make progress towards
862 breaking the cycle. Update self.changeset_graph accordingly.
864 It is not guaranteed that the cycle will be broken by one call to
865 this routine, but at least some progress must be made."""
867 self
.processed_changeset_logger
.flush()
870 for i
in range(len(cycle
)):
871 # It's OK if this index wraps to -1:
872 link
= ChangesetGraphLink(
873 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
875 if best_i
is None or link
< best_link
:
879 if Log().is_on(Log
.DEBUG
):
881 'Breaking cycle %s by breaking node %x' % (
882 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
883 best_link
.changeset
.id,))
885 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
887 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
889 for changeset
in new_changesets
:
890 self
.changeset_graph
.add_new_changeset(changeset
)
892 def run(self
, run_options
, stats_keeper
):
893 Log().quiet("Breaking revision changeset dependency cycles...")
895 Ctx()._projects
= read_projects(
896 artifact_manager
.get_temp_file(config
.PROJECTS
)
898 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
899 Ctx()._symbol
_db
= SymbolDatabase()
900 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
901 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
902 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
906 artifact_manager
.get_temp_file(
907 config
.CVS_ITEM_TO_CHANGESET
),
908 artifact_manager
.get_temp_file(
909 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
910 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
911 artifact_manager
.get_temp_file(
912 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
915 changeset_db
= ChangesetDatabase(
916 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
917 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
920 self
.changeset_graph
= ChangesetGraph(
921 changeset_db
, cvs_item_to_changeset_id
925 for changeset
in self
.get_source_changesets():
926 changeset_db
.store(changeset
)
927 if isinstance(changeset
, RevisionChangeset
):
928 self
.changeset_graph
.add_changeset(changeset
)
929 max_changeset_id
= max(max_changeset_id
, changeset
.id)
931 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
933 self
.processed_changeset_logger
= ProcessedChangesetLogger()
935 # Consume the graph, breaking cycles using self.break_cycle():
936 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
937 cycle_breaker
=self
.break_cycle
939 self
.processed_changeset_logger
.log(changeset
.id)
941 self
.processed_changeset_logger
.flush()
942 del self
.processed_changeset_logger
944 self
.changeset_graph
.close()
945 self
.changeset_graph
= None
946 Ctx()._cvs
_items
_db
.close()
947 Ctx()._symbol
_db
.close()
948 Ctx()._cvs
_path
_db
.close()
953 class RevisionTopologicalSortPass(Pass
):
954 """Sort RevisionChangesets into commit order.
956 Also convert them to OrderedChangesets, without changing their ids."""
958 def register_artifacts(self
):
959 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
960 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
961 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
962 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
963 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
964 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
965 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
966 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
967 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
968 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
970 def get_source_changesets(self
, changeset_db
):
971 changeset_ids
= changeset_db
.keys()
973 for changeset_id
in changeset_ids
:
974 yield changeset_db
[changeset_id
]
976 def get_changesets(self
):
977 changeset_db
= ChangesetDatabase(
978 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
979 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
983 changeset_graph
= ChangesetGraph(
985 CVSItemToChangesetTable(
986 artifact_manager
.get_temp_file(
987 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
993 for changeset
in self
.get_source_changesets(changeset_db
):
994 if isinstance(changeset
, RevisionChangeset
):
995 changeset_graph
.add_changeset(changeset
)
1002 changeset_ids
.append(None)
1004 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1005 changeset_ids
.append(changeset
.id)
1008 changeset_ids
.append(None)
1010 for i
in range(1, len(changeset_ids
) - 1):
1011 changeset
= changeset_db
[changeset_ids
[i
]]
1012 yield OrderedChangeset(
1013 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1014 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1016 changeset_graph
.close()
1018 def run(self
, run_options
, stats_keeper
):
1019 Log().quiet("Generating CVSRevisions in commit order...")
1021 Ctx()._projects
= read_projects(
1022 artifact_manager
.get_temp_file(config
.PROJECTS
)
1024 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1025 Ctx()._symbol
_db
= SymbolDatabase()
1026 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1027 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1028 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1031 changesets_revordered_db
= ChangesetDatabase(
1032 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1033 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1036 for changeset
in self
.get_changesets():
1037 changesets_revordered_db
.store(changeset
)
1039 changesets_revordered_db
.close()
1040 Ctx()._cvs
_items
_db
.close()
1041 Ctx()._symbol
_db
.close()
1042 Ctx()._cvs
_path
_db
.close()
1047 class BreakSymbolChangesetCyclesPass(Pass
):
1048 """Break up any dependency cycles involving only SymbolChangesets."""
1050 def register_artifacts(self
):
1051 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1052 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1053 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1054 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1055 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1056 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1057 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1058 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1059 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1060 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1061 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1063 def get_source_changesets(self
):
1064 old_changeset_db
= ChangesetDatabase(
1065 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1066 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1069 changeset_ids
= old_changeset_db
.keys()
1071 for changeset_id
in changeset_ids
:
1072 yield old_changeset_db
[changeset_id
]
1074 old_changeset_db
.close()
1076 def break_cycle(self
, cycle
):
1077 """Break up one or more changesets in CYCLE to help break the cycle.
1079 CYCLE is a list of Changesets where
1081 cycle[i] depends on cycle[i - 1]
1083 Break up one or more changesets in CYCLE to make progress towards
1084 breaking the cycle. Update self.changeset_graph accordingly.
1086 It is not guaranteed that the cycle will be broken by one call to
1087 this routine, but at least some progress must be made."""
1089 self
.processed_changeset_logger
.flush()
1092 for i
in range(len(cycle
)):
1093 # It's OK if this index wraps to -1:
1094 link
= ChangesetGraphLink(
1095 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1097 if best_i
is None or link
< best_link
:
1101 if Log().is_on(Log
.DEBUG
):
1103 'Breaking cycle %s by breaking node %x' % (
1104 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1105 best_link
.changeset
.id,))
1107 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1109 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1111 for changeset
in new_changesets
:
1112 self
.changeset_graph
.add_new_changeset(changeset
)
1114 def run(self
, run_options
, stats_keeper
):
1115 Log().quiet("Breaking symbol changeset dependency cycles...")
1117 Ctx()._projects
= read_projects(
1118 artifact_manager
.get_temp_file(config
.PROJECTS
)
1120 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1121 Ctx()._symbol
_db
= SymbolDatabase()
1122 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1123 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1124 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1128 artifact_manager
.get_temp_file(
1129 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1130 artifact_manager
.get_temp_file(
1131 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1132 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1133 artifact_manager
.get_temp_file(
1134 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1137 changeset_db
= ChangesetDatabase(
1138 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1139 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1142 self
.changeset_graph
= ChangesetGraph(
1143 changeset_db
, cvs_item_to_changeset_id
1146 max_changeset_id
= 0
1147 for changeset
in self
.get_source_changesets():
1148 changeset_db
.store(changeset
)
1149 if isinstance(changeset
, SymbolChangeset
):
1150 self
.changeset_graph
.add_changeset(changeset
)
1151 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1153 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1155 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1157 # Consume the graph, breaking cycles using self.break_cycle():
1158 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1159 cycle_breaker
=self
.break_cycle
1161 self
.processed_changeset_logger
.log(changeset
.id)
1163 self
.processed_changeset_logger
.flush()
1164 del self
.processed_changeset_logger
1166 self
.changeset_graph
.close()
1167 self
.changeset_graph
= None
1168 Ctx()._cvs
_items
_db
.close()
1169 Ctx()._symbol
_db
.close()
1170 Ctx()._cvs
_path
_db
.close()
1175 class BreakAllChangesetCyclesPass(Pass
):
1176 """Break up any dependency cycles that are closed by SymbolChangesets."""
1178 def register_artifacts(self
):
1179 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1180 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1181 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1182 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1183 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1184 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1185 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1186 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1187 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1188 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1189 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1191 def get_source_changesets(self
):
1192 old_changeset_db
= ChangesetDatabase(
1193 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1194 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1197 changeset_ids
= old_changeset_db
.keys()
1199 for changeset_id
in changeset_ids
:
1200 yield old_changeset_db
[changeset_id
]
1202 old_changeset_db
.close()
1204 def _split_retrograde_changeset(self
, changeset
):
1205 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1207 Log().debug('Breaking retrograde changeset %x' % (changeset
.id,))
1209 self
.changeset_graph
.delete_changeset(changeset
)
1211 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1213 for cvs_branch
in changeset
.iter_cvs_items():
1214 max_pred_ordinal
= 0
1215 min_succ_ordinal
= sys
.maxint
1217 for pred_id
in cvs_branch
.get_pred_ids():
1218 pred_ordinal
= self
.ordinals
.get(
1219 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1220 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1222 for succ_id
in cvs_branch
.get_succ_ids():
1223 succ_ordinal
= self
.ordinals
.get(
1224 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1225 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1227 assert max_pred_ordinal
< min_succ_ordinal
1228 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1230 # Find the earliest successor ordinal:
1231 min_min_succ_ordinal
= sys
.maxint
1232 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1233 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1237 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1238 if max_pred_ordinal
>= min_min_succ_ordinal
:
1239 late_item_ids
.append(id)
1241 early_item_ids
.append(id)
1243 assert early_item_ids
1244 assert late_item_ids
1246 early_changeset
= changeset
.create_split_changeset(
1247 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1248 late_changeset
= changeset
.create_split_changeset(
1249 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1251 self
.changeset_graph
.add_new_changeset(early_changeset
)
1252 self
.changeset_graph
.add_new_changeset(late_changeset
)
1254 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1256 # Because of the way we constructed it, the early changeset should
1257 # not have to be split:
1258 assert not early_split
1260 self
._split
_if
_retrograde
(late_changeset
.id)
1262 def _split_if_retrograde(self
, changeset_id
):
1263 node
= self
.changeset_graph
[changeset_id
]
1266 for id in node
.pred_ids
1267 if id in self
.ordinals
1269 pred_ordinals
.sort()
1272 for id in node
.succ_ids
1273 if id in self
.ordinals
1275 succ_ordinals
.sort()
1276 if pred_ordinals
and succ_ordinals \
1277 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1278 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1283 def break_segment(self
, segment
):
1284 """Break a changeset in SEGMENT[1:-1].
1286 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1287 that range are SymbolChangesets."""
1291 for i
in range(1, len(segment
) - 1):
1292 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1294 if best_i
is None or link
< best_link
:
1298 if Log().is_on(Log
.DEBUG
):
1300 'Breaking segment %s by breaking node %x' % (
1301 ' -> '.join(['%x' % node
.id for node
in segment
]),
1302 best_link
.changeset
.id,))
1304 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1306 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1308 for changeset
in new_changesets
:
1309 self
.changeset_graph
.add_new_changeset(changeset
)
1311 def break_cycle(self
, cycle
):
1312 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1314 CYCLE is a list of SymbolChangesets where
1316 cycle[i] depends on cycle[i - 1]
1318 . Break up one or more changesets in CYCLE to make progress
1319 towards breaking the cycle. Update self.changeset_graph
1322 It is not guaranteed that the cycle will be broken by one call to
1323 this routine, but at least some progress must be made."""
1325 if Log().is_on(Log
.DEBUG
):
1327 'Breaking cycle %s' % (
1328 ' -> '.join(['%x' % changeset
.id
1329 for changeset
in cycle
+ [cycle
[0]]]),))
1331 # Unwrap the cycle into a segment then break the segment:
1332 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1334 def run(self
, run_options
, stats_keeper
):
1335 Log().quiet("Breaking CVSSymbol dependency loops...")
1337 Ctx()._projects
= read_projects(
1338 artifact_manager
.get_temp_file(config
.PROJECTS
)
1340 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1341 Ctx()._symbol
_db
= SymbolDatabase()
1342 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1343 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1344 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1348 artifact_manager
.get_temp_file(
1349 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1350 artifact_manager
.get_temp_file(
1351 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1352 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1353 artifact_manager
.get_temp_file(
1354 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1357 self
.changeset_db
= ChangesetDatabase(
1358 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1359 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1362 self
.changeset_graph
= ChangesetGraph(
1363 self
.changeset_db
, self
.cvs_item_to_changeset_id
1366 # A map {changeset_id : ordinal} for OrderedChangesets:
1368 # A map {ordinal : changeset_id}:
1369 ordered_changeset_map
= {}
1370 # A list of all BranchChangeset ids:
1371 branch_changeset_ids
= []
1372 max_changeset_id
= 0
1373 for changeset
in self
.get_source_changesets():
1374 self
.changeset_db
.store(changeset
)
1375 self
.changeset_graph
.add_changeset(changeset
)
1376 if isinstance(changeset
, OrderedChangeset
):
1377 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1378 self
.ordinals
[changeset
.id] = changeset
.ordinal
1379 elif isinstance(changeset
, BranchChangeset
):
1380 branch_changeset_ids
.append(changeset
.id)
1381 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1383 # An array of ordered_changeset ids, indexed by ordinal:
1384 ordered_changesets
= []
1385 for ordinal
in range(len(ordered_changeset_map
)):
1386 id = ordered_changeset_map
[ordinal
]
1387 ordered_changesets
.append(id)
1389 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1390 del ordered_changeset_map
1392 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1394 # First we scan through all BranchChangesets looking for
1395 # changesets that are individually "retrograde" and splitting
1397 for changeset_id
in branch_changeset_ids
:
1398 self
._split
_if
_retrograde
(changeset_id
)
1402 next_ordered_changeset
= 0
1404 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1406 while self
.changeset_graph
:
1407 # Consume any nodes that don't have predecessors:
1408 for (changeset
, time_range
) \
1409 in self
.changeset_graph
.consume_nopred_nodes():
1410 self
.processed_changeset_logger
.log(changeset
.id)
1411 if changeset
.id in ordered_changeset_ids
:
1412 next_ordered_changeset
+= 1
1413 ordered_changeset_ids
.remove(changeset
.id)
1415 self
.processed_changeset_logger
.flush()
1417 if not self
.changeset_graph
:
1420 # Now work on the next ordered changeset that has not yet been
1421 # processed. BreakSymbolChangesetCyclesPass has broken any
1422 # cycles involving only SymbolChangesets, so the presence of a
1423 # cycle implies that there is at least one ordered changeset
1424 # left in the graph:
1425 assert next_ordered_changeset
< len(ordered_changesets
)
1427 id = ordered_changesets
[next_ordered_changeset
]
1428 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1430 if Log().is_on(Log
.DEBUG
):
1431 Log().debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1432 self
.break_segment(path
)
1434 # There were no ordered changesets among the reachable
1435 # predecessors, so do generic cycle-breaking:
1436 if Log().is_on(Log
.DEBUG
):
1438 'Breaking generic cycle found from %s'
1439 % (self
.changeset_db
[id],)
1441 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1443 del self
.processed_changeset_logger
1444 self
.changeset_graph
.close()
1445 self
.changeset_graph
= None
1446 self
.cvs_item_to_changeset_id
= None
1447 self
.changeset_db
= None
1452 class TopologicalSortPass(Pass
):
1453 """Sort changesets into commit order."""
1455 def register_artifacts(self
):
1456 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1457 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1458 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1459 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1460 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1461 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1462 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1463 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1464 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1466 def get_source_changesets(self
, changeset_db
):
1467 for changeset_id
in changeset_db
.keys():
1468 yield changeset_db
[changeset_id
]
1470 def get_changesets(self
):
1471 """Generate (changeset, timestamp) pairs in commit order."""
1473 changeset_db
= ChangesetDatabase(
1474 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1475 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1478 changeset_graph
= ChangesetGraph(
1480 CVSItemToChangesetTable(
1481 artifact_manager
.get_temp_file(
1482 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1487 symbol_changeset_ids
= set()
1489 for changeset
in self
.get_source_changesets(changeset_db
):
1490 changeset_graph
.add_changeset(changeset
)
1491 if isinstance(changeset
, SymbolChangeset
):
1492 symbol_changeset_ids
.add(changeset
.id)
1494 # Ensure a monotonically-increasing timestamp series by keeping
1495 # track of the previous timestamp and ensuring that the following
1497 timestamper
= Timestamper()
1499 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1500 timestamp
= timestamper
.get(
1501 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1503 yield (changeset
, timestamp
)
1505 changeset_graph
.close()
1507 def run(self
, run_options
, stats_keeper
):
1508 Log().quiet("Generating CVSRevisions in commit order...")
1510 Ctx()._projects
= read_projects(
1511 artifact_manager
.get_temp_file(config
.PROJECTS
)
1513 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1514 Ctx()._symbol
_db
= SymbolDatabase()
1515 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1516 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1517 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1520 sorted_changesets
= open(
1521 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1524 for (changeset
, timestamp
) in self
.get_changesets():
1525 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1527 sorted_changesets
.close()
1529 Ctx()._cvs
_items
_db
.close()
1530 Ctx()._symbol
_db
.close()
1531 Ctx()._cvs
_path
_db
.close()
1536 class CreateRevsPass(Pass
):
1537 """Generate the SVNCommit <-> CVSRevision mapping databases.
1539 SVNCommitCreator also calls SymbolingsLogger to register
1540 CVSRevisions that represent an opening or closing for a path on a
1541 branch or tag. See SymbolingsLogger for more details.
1543 This pass was formerly known as pass5."""
1545 def register_artifacts(self
):
1546 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1547 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1548 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1549 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1550 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1551 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1552 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1553 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1554 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1555 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1556 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1557 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1559 def get_changesets(self
):
1560 """Generate (changeset,timestamp,) tuples in commit order."""
1562 changeset_db
= ChangesetDatabase(
1563 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1564 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1568 artifact_manager
.get_temp_file(
1569 config
.CHANGESETS_SORTED_DATAFILE
)):
1570 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1571 yield (changeset_db
[changeset_id
], timestamp
)
1573 changeset_db
.close()
1575 def get_svn_commits(self
, creator
):
1576 """Generate the SVNCommits, in order."""
1578 for (changeset
, timestamp
) in self
.get_changesets():
1579 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1582 def log_svn_commit(self
, svn_commit
):
1583 """Output information about SVN_COMMIT."""
1586 'Creating Subversion r%d (%s)'
1587 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1590 if isinstance(svn_commit
, SVNRevisionCommit
):
1591 for cvs_rev
in svn_commit
.cvs_revs
:
1592 Log().verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1594 def run(self
, run_options
, stats_keeper
):
1595 Log().quiet("Mapping CVS revisions to Subversion commits...")
1597 Ctx()._projects
= read_projects(
1598 artifact_manager
.get_temp_file(config
.PROJECTS
)
1600 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1601 Ctx()._symbol
_db
= SymbolDatabase()
1602 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1603 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1604 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1607 Ctx()._symbolings
_logger
= SymbolingsLogger()
1609 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1611 creator
= SVNCommitCreator()
1612 for svn_commit
in self
.get_svn_commits(creator
):
1613 self
.log_svn_commit(svn_commit
)
1614 persistence_manager
.put_svn_commit(svn_commit
)
1616 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1619 persistence_manager
.close()
1620 Ctx()._symbolings
_logger
.close()
1621 Ctx()._cvs
_items
_db
.close()
1622 Ctx()._symbol
_db
.close()
1623 Ctx()._cvs
_path
_db
.close()
1628 class SortSymbolOpeningsClosingsPass(Pass
):
1629 """This pass was formerly known as pass6."""
1631 def register_artifacts(self
):
1632 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1633 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1635 def run(self
, run_options
, stats_keeper
):
1636 Log().quiet("Sorting symbolic name source revisions...")
1639 line
= line
.split(' ', 2)
1640 return (int(line
[0], 16), int(line
[1]), line
[2],)
1643 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1644 artifact_manager
.get_temp_file(
1645 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1648 tempdirs
=[Ctx().tmpdir
],
1653 class IndexSymbolsPass(Pass
):
1654 """This pass was formerly known as pass7."""
1656 def register_artifacts(self
):
1657 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1658 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1659 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1660 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1662 def generate_offsets_for_symbolings(self
):
1663 """This function iterates through all the lines in
1664 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1665 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1666 where SYMBOLIC_NAME is first encountered. This will allow us to
1667 seek to the various offsets in the file and sequentially read only
1668 the openings and closings that we need."""
1673 artifact_manager
.get_temp_file(
1674 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1682 id, svn_revnum
, ignored
= line
.split(" ", 2)
1685 Log().verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1692 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1693 cPickle
.dump(offsets
, offsets_db
, -1)
1696 def run(self
, run_options
, stats_keeper
):
1697 Log().quiet("Determining offsets for all symbolic names...")
1698 Ctx()._projects
= read_projects(
1699 artifact_manager
.get_temp_file(config
.PROJECTS
)
1701 Ctx()._symbol
_db
= SymbolDatabase()
1702 self
.generate_offsets_for_symbolings()
1703 Ctx()._symbol
_db
.close()
1704 Log().quiet("Done.")
1707 class OutputPass(Pass
):
1708 """This pass was formerly known as pass8."""
1710 def register_artifacts(self
):
1711 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1712 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1713 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1714 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1715 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1716 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1717 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1718 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1719 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1720 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1721 Ctx().output_option
.register_artifacts(self
)
1723 def run(self
, run_options
, stats_keeper
):
1724 Ctx()._projects
= read_projects(
1725 artifact_manager
.get_temp_file(config
.PROJECTS
)
1727 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1728 Ctx()._metadata
_db
= MetadataDatabase(
1729 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1730 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1733 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1734 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1735 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1737 Ctx()._symbol
_db
= SymbolDatabase()
1738 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1740 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1743 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1745 svn_commit
.output(Ctx().output_option
)
1747 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1749 Ctx().output_option
.cleanup()
1750 Ctx()._persistence
_manager
.close()
1752 Ctx()._symbol
_db
.close()
1753 Ctx()._cvs
_items
_db
.close()
1754 Ctx()._metadata
_db
.close()
1755 Ctx()._cvs
_path
_db
.close()
1758 # The list of passes constituting a run of cvs2svn:
1761 CleanMetadataPass(),
1762 CollateSymbolsPass(),
1763 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1764 FilterSymbolsPass(),
1765 SortRevisionsPass(),
1767 InitializeChangesetsPass(),
1768 #CheckIndexedItemStoreDependenciesPass(
1769 # config.CVS_ITEMS_SORTED_STORE,
1770 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1771 BreakRevisionChangesetCyclesPass(),
1772 RevisionTopologicalSortPass(),
1773 BreakSymbolChangesetCyclesPass(),
1774 BreakAllChangesetCyclesPass(),
1775 TopologicalSortPass(),
1777 SortSymbolOpeningsClosingsPass(),