1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
24 from cvs2svn_lib
import config
25 from cvs2svn_lib
.context
import Ctx
26 from cvs2svn_lib
.common
import warning_prefix
27 from cvs2svn_lib
.common
import FatalException
28 from cvs2svn_lib
.common
import FatalError
29 from cvs2svn_lib
.common
import InternalError
30 from cvs2svn_lib
.common
import DB_OPEN_NEW
31 from cvs2svn_lib
.common
import DB_OPEN_READ
32 from cvs2svn_lib
.common
import DB_OPEN_WRITE
33 from cvs2svn_lib
.common
import Timestamper
34 from cvs2svn_lib
.sort
import sort_file
35 from cvs2svn_lib
.log
import Log
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_path_database
import CVSPathDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.collect_data
import CollectData
78 from cvs2svn_lib
.check_dependencies_pass \
79 import CheckItemStoreDependenciesPass
80 from cvs2svn_lib
.check_dependencies_pass \
81 import CheckIndexedItemStoreDependenciesPass
84 class CollectRevsPass(Pass
):
85 """This pass was formerly known as pass1."""
87 def register_artifacts(self
):
88 self
._register
_temp
_file
(config
.PROJECTS
)
89 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
90 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
91 self
._register
_temp
_file
(config
.METADATA_STORE
)
92 self
._register
_temp
_file
(config
.CVS_PATHS_DB
)
93 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
95 def run(self
, run_options
, stats_keeper
):
96 Log().quiet("Examining all CVS ',v' files...")
98 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_NEW
)
99 cd
= CollectData(stats_keeper
)
100 for project
in run_options
.projects
:
101 cd
.process_project(project
)
102 run_options
.projects
= None
104 fatal_errors
= cd
.close()
107 raise FatalException("Pass 1 complete.\n"
110 + "\n".join(fatal_errors
) + "\n"
111 + "Exited due to fatal error(s).")
113 Ctx()._cvs
_path
_db
.close()
114 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
118 class CleanMetadataPass(Pass
):
119 """Clean up CVS revision metadata and write it to a new database."""
121 def register_artifacts(self
):
122 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
123 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
124 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
125 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
127 def _get_clean_author(self
, author
):
128 """Return AUTHOR, converted appropriately to UTF8.
130 Raise a UnicodeException if it cannot be converted using the
131 configured cvs_author_decoder."""
134 return self
._authors
[author
]
139 clean_author
= Ctx().cvs_author_decoder(author
)
141 self
._authors
[author
] = author
142 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
145 clean_author
= clean_author
.encode('utf8')
147 self
._authors
[author
] = author
148 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
150 self
._authors
[author
] = clean_author
153 def _get_clean_log_msg(self
, log_msg
):
154 """Return LOG_MSG, converted appropriately to UTF8.
156 Raise a UnicodeException if it cannot be converted using the
157 configured cvs_log_decoder."""
160 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
163 'Problem decoding log message:\n'
167 % ('-' * 75, log_msg
, '-' * 75,)
171 return clean_log_msg
.encode('utf8')
174 'Problem encoding log message:\n'
178 % ('-' * 75, log_msg
, '-' * 75,)
181 def _clean_metadata(self
, metadata
):
182 """Clean up METADATA by overwriting its members as necessary."""
185 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
186 except UnicodeError, e
:
187 Log().warn('%s: %s' % (warning_prefix
, e
,))
191 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
192 except UnicodeError, e
:
193 Log().warn('%s: %s' % (warning_prefix
, e
,))
196 def run(self
, run_options
, stats_keeper
):
197 Log().quiet("Converting metadata to UTF8...")
198 metadata_db
= MetadataDatabase(
199 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
200 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
203 metadata_clean_db
= MetadataDatabase(
204 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
205 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
209 self
.warnings
= False
211 # A map {author : clean_author} for those known (to avoid
212 # repeating warnings):
215 for id in metadata_db
.iterkeys():
216 metadata
= metadata_db
[id]
218 # Record the original author name because it might be needed for
219 # expanding CVS keywords:
220 metadata
.original_author
= metadata
.author
222 self
._clean
_metadata
(metadata
)
224 metadata_clean_db
[id] = metadata
228 'There were warnings converting author names and/or log messages\n'
229 'to Unicode (see messages above). Please restart this pass\n'
230 'with one or more \'--encoding\' parameters or with\n'
231 '\'--fallback-encoding\'.'
234 metadata_clean_db
.close()
239 class CollateSymbolsPass(Pass
):
240 """Divide symbols into branches, tags, and excludes."""
246 ExcludedSymbol
: 'exclude',
250 def register_artifacts(self
):
251 self
._register
_temp
_file
(config
.SYMBOL_DB
)
252 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
253 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
255 def get_symbol(self
, run_options
, stats
):
256 """Use StrategyRules to decide what to do with a symbol.
258 STATS is an instance of symbol_statistics._Stats describing an
259 instance of Symbol or Trunk. To determine how the symbol is to be
260 converted, consult the StrategyRules in the project's
261 symbol_strategy_rules. Each rule is allowed a chance to change
262 the way the symbol will be converted. If the symbol is not a
263 Trunk or TypedSymbol after all rules have run, raise
264 IndeterminateSymbolException."""
267 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
269 symbol
= rule
.get_symbol(symbol
, stats
)
270 assert symbol
is not None
272 stats
.check_valid(symbol
)
276 def log_symbol_summary(self
, stats
, symbol
):
277 if not self
.symbol_info_file
:
280 if isinstance(symbol
, Trunk
):
282 preferred_parent_name
= '.'
284 name
= stats
.lod
.name
285 if symbol
.preferred_parent_id
is None:
286 preferred_parent_name
= '.'
288 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
289 if isinstance(preferred_parent
, Trunk
):
290 preferred_parent_name
= '.trunk.'
292 preferred_parent_name
= preferred_parent
.name
294 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
295 symbol_path
= symbol
.base_path
299 self
.symbol_info_file
.write(
300 '%-5d %-30s %-10s %s %s\n' % (
301 stats
.lod
.project
.id,
303 self
.conversion_names
[symbol
.__class
__],
305 preferred_parent_name
,
308 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
309 parent_counts
= stats
.possible_parents
.items()
311 self
.symbol_info_file
.write(' # Possible parents:\n')
312 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
313 for (pp
, count
) in parent_counts
:
314 if isinstance(pp
, Trunk
):
315 self
.symbol_info_file
.write(
316 ' # .trunk. : %d\n' % (count
,)
319 self
.symbol_info_file
.write(
320 ' # %s : %d\n' % (pp
.name
, count
,)
323 def get_symbols(self
, run_options
):
324 """Return a map telling how to convert symbols.
326 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
327 indicating how each symbol should be converted. Trunk objects in
328 SYMBOL_STATS are passed through unchanged. One object is included
329 in the return value for each line of development described in
332 Raise FatalError if there was an error."""
337 if Ctx().symbol_info_filename
is not None:
338 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
339 self
.symbol_info_file
.write(
340 '# Columns: project_id symbol_name conversion symbol_path '
341 'preferred_parent_name\n'
344 self
.symbol_info_file
= None
346 # Initialize each symbol strategy rule a single time, even if it
347 # is used in more than one project. First define a map from
348 # object id to symbol strategy rule:
350 for rule_list
in run_options
.project_symbol_strategy_rules
:
351 for rule
in rule_list
:
352 rules
[id(rule
)] = rule
354 for rule
in rules
.itervalues():
355 rule
.start(self
.symbol_stats
)
359 for stats
in self
.symbol_stats
:
361 symbol
= self
.get_symbol(run_options
, stats
)
362 except IndeterminateSymbolException
, e
:
363 self
.log_symbol_summary(stats
, stats
.lod
)
364 mismatches
.append(e
.stats
)
365 except SymbolPlanError
, e
:
366 self
.log_symbol_summary(stats
, stats
.lod
)
369 self
.log_symbol_summary(stats
, symbol
)
370 retval
[stats
.lod
] = symbol
372 for rule
in rules
.itervalues():
375 if self
.symbol_info_file
:
376 self
.symbol_info_file
.close()
378 del self
.symbol_info_file
380 if errors
or mismatches
:
381 s
= ['Problems determining how symbols should be converted:\n']
383 s
.append('%s\n' % (e
,))
386 'It is not clear how the following symbols '
387 'should be converted.\n'
388 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
390 '--symbol-default to resolve the ambiguity.\n'
392 for stats
in mismatches
:
393 s
.append(' %s\n' % (stats
,))
394 raise FatalError(''.join(s
))
398 def run(self
, run_options
, stats_keeper
):
399 Ctx()._projects
= read_projects(
400 artifact_manager
.get_temp_file(config
.PROJECTS
)
402 self
.symbol_stats
= SymbolStatistics(
403 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
406 symbol_map
= self
.get_symbols(run_options
)
408 # Check the symbols for consistency and bail out if there were errors:
409 self
.symbol_stats
.check_consistency(symbol_map
)
411 # Check that the symbols all have SVN paths set and that the paths
413 Ctx().output_option
.check_symbols(symbol_map
)
415 for symbol
in symbol_map
.itervalues():
416 if isinstance(symbol
, ExcludedSymbol
):
417 self
.symbol_stats
.exclude_symbol(symbol
)
419 create_symbol_database(symbol_map
.values())
421 del self
.symbol_stats
426 class FilterSymbolsPass(Pass
):
427 """Delete any branches/tags that are to be excluded.
429 Also delete revisions on excluded branches, and delete other
430 references to the excluded symbols."""
432 def register_artifacts(self
):
433 self
._register
_temp
_file
(config
.ITEM_SERIALIZER
)
434 self
._register
_temp
_file
(config
.CVS_REVS_DATAFILE
)
435 self
._register
_temp
_file
(config
.CVS_SYMBOLS_DATAFILE
)
436 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
437 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
438 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
439 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
440 Ctx().revision_collector
.register_artifacts(self
)
442 def run(self
, run_options
, stats_keeper
):
443 Ctx()._projects
= read_projects(
444 artifact_manager
.get_temp_file(config
.PROJECTS
)
446 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
447 Ctx()._symbol
_db
= SymbolDatabase()
448 cvs_item_store
= OldCVSItemStore(
449 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
451 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
452 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'wb')
453 cPickle
.dump(cvs_item_serializer
, f
, -1)
456 rev_db
= NewSortableCVSRevisionDatabase(
457 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
461 symbol_db
= NewSortableCVSSymbolDatabase(
462 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
466 revision_collector
= Ctx().revision_collector
468 Log().quiet("Filtering out excluded symbols and summarizing items...")
470 stats_keeper
.reset_cvs_rev_info()
471 revision_collector
.start()
473 # Process the cvs items store one file at a time:
474 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
475 Log().verbose(cvs_file_items
.cvs_file
.filename
)
476 cvs_file_items
.filter_excluded_symbols()
477 cvs_file_items
.mutate_symbols()
478 cvs_file_items
.adjust_parents()
479 cvs_file_items
.refine_symbols()
480 cvs_file_items
.record_opened_symbols()
481 cvs_file_items
.record_closed_symbols()
482 cvs_file_items
.check_link_consistency()
484 # Give the revision collector a chance to collect data about the
486 revision_collector
.process_file(cvs_file_items
)
488 # Store whatever is left to the new file and update statistics:
489 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
490 for cvs_item
in cvs_file_items
.values():
491 stats_keeper
.record_cvs_item(cvs_item
)
493 if isinstance(cvs_item
, CVSRevision
):
495 elif isinstance(cvs_item
, CVSSymbol
):
496 symbol_db
.add(cvs_item
)
498 stats_keeper
.set_stats_reflect_exclude(True)
502 revision_collector
.finish()
503 cvs_item_store
.close()
504 Ctx()._symbol
_db
.close()
505 Ctx()._cvs
_path
_db
.close()
510 class SortRevisionsPass(Pass
):
511 """Sort the revisions file."""
513 def register_artifacts(self
):
514 self
._register
_temp
_file
(config
.CVS_REVS_SORTED_DATAFILE
)
515 self
._register
_temp
_file
_needed
(config
.CVS_REVS_DATAFILE
)
517 def run(self
, run_options
, stats_keeper
):
518 Log().quiet("Sorting CVS revision summaries...")
520 artifact_manager
.get_temp_file(config
.CVS_REVS_DATAFILE
),
521 artifact_manager
.get_temp_file(
522 config
.CVS_REVS_SORTED_DATAFILE
524 tempdirs
=[Ctx().tmpdir
],
529 class SortSymbolsPass(Pass
):
530 """Sort the symbols file."""
532 def register_artifacts(self
):
533 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SORTED_DATAFILE
)
534 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_DATAFILE
)
536 def run(self
, run_options
, stats_keeper
):
537 Log().quiet("Sorting CVS symbol summaries...")
539 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_DATAFILE
),
540 artifact_manager
.get_temp_file(
541 config
.CVS_SYMBOLS_SORTED_DATAFILE
543 tempdirs
=[Ctx().tmpdir
],
548 class InitializeChangesetsPass(Pass
):
549 """Create preliminary CommitSets."""
551 def register_artifacts(self
):
552 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
553 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
554 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
555 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
556 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
557 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
558 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
559 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
560 self
._register
_temp
_file
_needed
(config
.ITEM_SERIALIZER
)
561 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SORTED_DATAFILE
)
562 self
._register
_temp
_file
_needed
(
563 config
.CVS_SYMBOLS_SORTED_DATAFILE
)
565 def get_revision_changesets(self
):
566 """Generate revision changesets, one at a time.
568 Each time, yield a list of CVSRevisions that might potentially
569 consititute a changeset."""
571 # Create changesets for CVSRevisions:
572 old_metadata_id
= None
576 db
= OldSortableCVSRevisionDatabase(
577 artifact_manager
.get_temp_file(
578 config
.CVS_REVS_SORTED_DATAFILE
580 self
.cvs_item_serializer
,
584 if cvs_rev
.metadata_id
!= old_metadata_id \
585 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
586 # Start a new changeset. First finish up the old changeset,
589 yield changeset_items
591 old_metadata_id
= cvs_rev
.metadata_id
592 changeset_items
.append(cvs_rev
)
593 old_timestamp
= cvs_rev
.timestamp
595 # Finish up the last changeset, if any:
597 yield changeset_items
599 def get_symbol_changesets(self
):
600 """Generate symbol changesets, one at a time.
602 Each time, yield a list of CVSSymbols that might potentially
603 consititute a changeset."""
608 db
= OldSortableCVSSymbolDatabase(
609 artifact_manager
.get_temp_file(
610 config
.CVS_SYMBOLS_SORTED_DATAFILE
612 self
.cvs_item_serializer
,
615 for cvs_symbol
in db
:
616 if cvs_symbol
.symbol
.id != old_symbol_id
:
617 # Start a new changeset. First finish up the old changeset,
620 yield changeset_items
622 old_symbol_id
= cvs_symbol
.symbol
.id
623 changeset_items
.append(cvs_symbol
)
625 # Finish up the last changeset, if any:
627 yield changeset_items
630 def compare_items(a
, b
):
632 cmp(a
.timestamp
, b
.timestamp
)
633 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
634 or cmp([int(x
) for x
in a
.rev
.split('.')],
635 [int(x
) for x
in b
.rev
.split('.')])
638 def break_internal_dependencies(self
, changeset_items
):
639 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
641 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
642 belong in a single RevisionChangeset, but there might be internal
643 dependencies among the items. Return a list of lists, where each
644 sublist is a list of CVSRevisions and at least one internal
645 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
646 to be split, then the return value will contain a single value,
647 namely the original value of CHANGESET_ITEMS. Split
648 CHANGESET_ITEMS at most once, even though the resulting changesets
649 might themselves have internal dependencies."""
651 # We only look for succ dependencies, since by doing so we
652 # automatically cover pred dependencies as well. First create a
653 # list of tuples (pred, succ) of id pairs for CVSItems that depend
656 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
657 for cvs_item
in changeset_items
:
658 for next_id
in cvs_item
.get_succ_ids():
659 if next_id
in changeset_cvs_item_ids
:
660 # Sanity check: a CVSItem should never depend on itself:
661 if next_id
== cvs_item
.id:
662 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
664 dependencies
.append((cvs_item
.id, next_id
,))
667 # Sort the changeset_items in a defined order (chronological to the
668 # extent that the timestamps are correct and unique).
669 changeset_items
.sort(self
.compare_items
)
671 for (i
, changeset_item
) in enumerate(changeset_items
):
672 indexes
[changeset_item
.id] = i
673 # How many internal dependencies would be broken by breaking the
674 # Changeset after a particular index?
675 breaks
= [0] * len(changeset_items
)
676 for (pred
, succ
,) in dependencies
:
677 pred_index
= indexes
[pred
]
678 succ_index
= indexes
[succ
]
679 breaks
[min(pred_index
, succ_index
)] += 1
680 breaks
[max(pred_index
, succ_index
)] -= 1
684 for i
in range(1, len(breaks
)):
685 breaks
[i
] += breaks
[i
- 1]
686 for i
in range(0, len(breaks
) - 1):
687 if breaks
[i
] > best_count
:
689 best_count
= breaks
[i
]
690 best_time
= (changeset_items
[i
+ 1].timestamp
691 - changeset_items
[i
].timestamp
)
692 elif breaks
[i
] == best_count \
693 and (changeset_items
[i
+ 1].timestamp
694 - changeset_items
[i
].timestamp
) < best_time
:
696 best_count
= breaks
[i
]
697 best_time
= (changeset_items
[i
+ 1].timestamp
698 - changeset_items
[i
].timestamp
)
699 # Reuse the old changeset.id for the first of the split changesets.
700 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
702 return [changeset_items
]
704 def break_all_internal_dependencies(self
, changeset_items
):
705 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
707 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
708 be part of a single changeset. Break this list into sublists,
709 where the CVSRevisions in each sublist are free of mutual
712 # This method is written non-recursively to avoid any possible
713 # problems with recursion depth.
715 changesets_to_split
= [changeset_items
]
716 while changesets_to_split
:
717 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
718 if len(changesets
) == 1:
719 [changeset_items
] = changesets
720 yield changeset_items
722 # The changeset had to be split; see if either of the
723 # fragments have to be split:
725 changesets_to_split
.extend(changesets
)
727 def get_changesets(self
):
728 """Generate (Changeset, [CVSItem,...]) for all changesets.
730 The Changesets already have their internal dependencies broken.
731 The [CVSItem,...] list is the list of CVSItems in the
732 corresponding Changeset."""
734 for changeset_items
in self
.get_revision_changesets():
735 for split_changeset_items \
736 in self
.break_all_internal_dependencies(changeset_items
):
739 self
.changeset_key_generator
.gen_id(),
740 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
742 split_changeset_items
,
745 for changeset_items
in self
.get_symbol_changesets():
747 create_symbol_changeset(
748 self
.changeset_key_generator
.gen_id(),
749 changeset_items
[0].symbol
,
750 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
755 def run(self
, run_options
, stats_keeper
):
756 Log().quiet("Creating preliminary commit sets...")
758 Ctx()._projects
= read_projects(
759 artifact_manager
.get_temp_file(config
.PROJECTS
)
761 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
762 Ctx()._symbol
_db
= SymbolDatabase()
764 f
= open(artifact_manager
.get_temp_file(config
.ITEM_SERIALIZER
), 'rb')
765 self
.cvs_item_serializer
= cPickle
.load(f
)
768 changeset_db
= ChangesetDatabase(
769 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
770 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
773 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
774 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
778 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
779 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
780 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
783 self
.changeset_key_generator
= KeyGenerator()
785 for (changeset
, changeset_items
) in self
.get_changesets():
786 if Log().is_on(Log
.DEBUG
):
787 Log().debug(repr(changeset
))
788 changeset_db
.store(changeset
)
789 for cvs_item
in changeset_items
:
790 self
.sorted_cvs_items_db
.add(cvs_item
)
791 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
793 self
.sorted_cvs_items_db
.close()
794 cvs_item_to_changeset_id
.close()
796 Ctx()._symbol
_db
.close()
797 Ctx()._cvs
_path
_db
.close()
799 del self
.cvs_item_serializer
804 class ProcessedChangesetLogger
:
806 self
.processed_changeset_ids
= []
808 def log(self
, changeset_id
):
809 if Log().is_on(Log
.DEBUG
):
810 self
.processed_changeset_ids
.append(changeset_id
)
813 if self
.processed_changeset_ids
:
815 'Consumed changeset ids %s'
816 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
818 del self
.processed_changeset_ids
[:]
821 class BreakRevisionChangesetCyclesPass(Pass
):
822 """Break up any dependency cycles involving only RevisionChangesets."""
824 def register_artifacts(self
):
825 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
826 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
827 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
828 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
829 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
830 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
831 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
832 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
833 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
834 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
835 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
837 def get_source_changesets(self
):
838 old_changeset_db
= ChangesetDatabase(
839 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
840 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
843 changeset_ids
= old_changeset_db
.keys()
845 for changeset_id
in changeset_ids
:
846 yield old_changeset_db
[changeset_id
]
848 old_changeset_db
.close()
851 def break_cycle(self
, cycle
):
852 """Break up one or more changesets in CYCLE to help break the cycle.
854 CYCLE is a list of Changesets where
856 cycle[i] depends on cycle[i - 1]
858 Break up one or more changesets in CYCLE to make progress towards
859 breaking the cycle. Update self.changeset_graph accordingly.
861 It is not guaranteed that the cycle will be broken by one call to
862 this routine, but at least some progress must be made."""
864 self
.processed_changeset_logger
.flush()
867 for i
in range(len(cycle
)):
868 # It's OK if this index wraps to -1:
869 link
= ChangesetGraphLink(
870 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
872 if best_i
is None or link
< best_link
:
876 if Log().is_on(Log
.DEBUG
):
878 'Breaking cycle %s by breaking node %x' % (
879 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
880 best_link
.changeset
.id,))
882 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
884 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
886 for changeset
in new_changesets
:
887 self
.changeset_graph
.add_new_changeset(changeset
)
889 def run(self
, run_options
, stats_keeper
):
890 Log().quiet("Breaking revision changeset dependency cycles...")
892 Ctx()._projects
= read_projects(
893 artifact_manager
.get_temp_file(config
.PROJECTS
)
895 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
896 Ctx()._symbol
_db
= SymbolDatabase()
897 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
898 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
899 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
903 artifact_manager
.get_temp_file(
904 config
.CVS_ITEM_TO_CHANGESET
),
905 artifact_manager
.get_temp_file(
906 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
907 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
908 artifact_manager
.get_temp_file(
909 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
912 changeset_db
= ChangesetDatabase(
913 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
914 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
917 self
.changeset_graph
= ChangesetGraph(
918 changeset_db
, cvs_item_to_changeset_id
922 for changeset
in self
.get_source_changesets():
923 changeset_db
.store(changeset
)
924 if isinstance(changeset
, RevisionChangeset
):
925 self
.changeset_graph
.add_changeset(changeset
)
926 max_changeset_id
= max(max_changeset_id
, changeset
.id)
928 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
930 self
.processed_changeset_logger
= ProcessedChangesetLogger()
932 # Consume the graph, breaking cycles using self.break_cycle():
933 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
934 cycle_breaker
=self
.break_cycle
936 self
.processed_changeset_logger
.log(changeset
.id)
938 self
.processed_changeset_logger
.flush()
939 del self
.processed_changeset_logger
941 self
.changeset_graph
.close()
942 self
.changeset_graph
= None
943 Ctx()._cvs
_items
_db
.close()
944 Ctx()._symbol
_db
.close()
945 Ctx()._cvs
_path
_db
.close()
950 class RevisionTopologicalSortPass(Pass
):
951 """Sort RevisionChangesets into commit order.
953 Also convert them to OrderedChangesets, without changing their ids."""
955 def register_artifacts(self
):
956 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
957 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
958 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
959 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
960 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
961 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
962 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
963 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
964 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
965 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
967 def get_source_changesets(self
, changeset_db
):
968 changeset_ids
= changeset_db
.keys()
970 for changeset_id
in changeset_ids
:
971 yield changeset_db
[changeset_id
]
973 def get_changesets(self
):
974 changeset_db
= ChangesetDatabase(
975 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
976 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
980 changeset_graph
= ChangesetGraph(
982 CVSItemToChangesetTable(
983 artifact_manager
.get_temp_file(
984 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
990 for changeset
in self
.get_source_changesets(changeset_db
):
991 if isinstance(changeset
, RevisionChangeset
):
992 changeset_graph
.add_changeset(changeset
)
999 changeset_ids
.append(None)
1001 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1002 changeset_ids
.append(changeset
.id)
1005 changeset_ids
.append(None)
1007 for i
in range(1, len(changeset_ids
) - 1):
1008 changeset
= changeset_db
[changeset_ids
[i
]]
1009 yield OrderedChangeset(
1010 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1011 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1013 changeset_graph
.close()
1015 def run(self
, run_options
, stats_keeper
):
1016 Log().quiet("Generating CVSRevisions in commit order...")
1018 Ctx()._projects
= read_projects(
1019 artifact_manager
.get_temp_file(config
.PROJECTS
)
1021 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1022 Ctx()._symbol
_db
= SymbolDatabase()
1023 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1024 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1025 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1028 changesets_revordered_db
= ChangesetDatabase(
1029 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1030 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1033 for changeset
in self
.get_changesets():
1034 changesets_revordered_db
.store(changeset
)
1036 changesets_revordered_db
.close()
1037 Ctx()._cvs
_items
_db
.close()
1038 Ctx()._symbol
_db
.close()
1039 Ctx()._cvs
_path
_db
.close()
1044 class BreakSymbolChangesetCyclesPass(Pass
):
1045 """Break up any dependency cycles involving only SymbolChangesets."""
1047 def register_artifacts(self
):
1048 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1049 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1050 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1051 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1052 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1053 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1054 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1055 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1056 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1057 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1058 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1060 def get_source_changesets(self
):
1061 old_changeset_db
= ChangesetDatabase(
1062 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1063 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1066 changeset_ids
= old_changeset_db
.keys()
1068 for changeset_id
in changeset_ids
:
1069 yield old_changeset_db
[changeset_id
]
1071 old_changeset_db
.close()
1073 def break_cycle(self
, cycle
):
1074 """Break up one or more changesets in CYCLE to help break the cycle.
1076 CYCLE is a list of Changesets where
1078 cycle[i] depends on cycle[i - 1]
1080 Break up one or more changesets in CYCLE to make progress towards
1081 breaking the cycle. Update self.changeset_graph accordingly.
1083 It is not guaranteed that the cycle will be broken by one call to
1084 this routine, but at least some progress must be made."""
1086 self
.processed_changeset_logger
.flush()
1089 for i
in range(len(cycle
)):
1090 # It's OK if this index wraps to -1:
1091 link
= ChangesetGraphLink(
1092 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1094 if best_i
is None or link
< best_link
:
1098 if Log().is_on(Log
.DEBUG
):
1100 'Breaking cycle %s by breaking node %x' % (
1101 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1102 best_link
.changeset
.id,))
1104 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1106 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1108 for changeset
in new_changesets
:
1109 self
.changeset_graph
.add_new_changeset(changeset
)
1111 def run(self
, run_options
, stats_keeper
):
1112 Log().quiet("Breaking symbol changeset dependency cycles...")
1114 Ctx()._projects
= read_projects(
1115 artifact_manager
.get_temp_file(config
.PROJECTS
)
1117 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1118 Ctx()._symbol
_db
= SymbolDatabase()
1119 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1120 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1121 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1125 artifact_manager
.get_temp_file(
1126 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1127 artifact_manager
.get_temp_file(
1128 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1129 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1130 artifact_manager
.get_temp_file(
1131 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1134 changeset_db
= ChangesetDatabase(
1135 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1136 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1139 self
.changeset_graph
= ChangesetGraph(
1140 changeset_db
, cvs_item_to_changeset_id
1143 max_changeset_id
= 0
1144 for changeset
in self
.get_source_changesets():
1145 changeset_db
.store(changeset
)
1146 if isinstance(changeset
, SymbolChangeset
):
1147 self
.changeset_graph
.add_changeset(changeset
)
1148 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1150 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1152 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1154 # Consume the graph, breaking cycles using self.break_cycle():
1155 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1156 cycle_breaker
=self
.break_cycle
1158 self
.processed_changeset_logger
.log(changeset
.id)
1160 self
.processed_changeset_logger
.flush()
1161 del self
.processed_changeset_logger
1163 self
.changeset_graph
.close()
1164 self
.changeset_graph
= None
1165 Ctx()._cvs
_items
_db
.close()
1166 Ctx()._symbol
_db
.close()
1167 Ctx()._cvs
_path
_db
.close()
1172 class BreakAllChangesetCyclesPass(Pass
):
1173 """Break up any dependency cycles that are closed by SymbolChangesets."""
1175 def register_artifacts(self
):
1176 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1177 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1178 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1179 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1180 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1181 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1182 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1183 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1184 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1185 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1186 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1188 def get_source_changesets(self
):
1189 old_changeset_db
= ChangesetDatabase(
1190 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1191 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1194 changeset_ids
= old_changeset_db
.keys()
1196 for changeset_id
in changeset_ids
:
1197 yield old_changeset_db
[changeset_id
]
1199 old_changeset_db
.close()
1201 def _split_retrograde_changeset(self
, changeset
):
1202 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1204 Log().debug('Breaking retrograde changeset %x' % (changeset
.id,))
1206 self
.changeset_graph
.delete_changeset(changeset
)
1208 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1210 for cvs_branch
in changeset
.iter_cvs_items():
1211 max_pred_ordinal
= 0
1212 min_succ_ordinal
= sys
.maxint
1214 for pred_id
in cvs_branch
.get_pred_ids():
1215 pred_ordinal
= self
.ordinals
.get(
1216 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1217 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1219 for succ_id
in cvs_branch
.get_succ_ids():
1220 succ_ordinal
= self
.ordinals
.get(
1221 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1222 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1224 assert max_pred_ordinal
< min_succ_ordinal
1225 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1227 # Find the earliest successor ordinal:
1228 min_min_succ_ordinal
= sys
.maxint
1229 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1230 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1234 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1235 if max_pred_ordinal
>= min_min_succ_ordinal
:
1236 late_item_ids
.append(id)
1238 early_item_ids
.append(id)
1240 assert early_item_ids
1241 assert late_item_ids
1243 early_changeset
= changeset
.create_split_changeset(
1244 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1245 late_changeset
= changeset
.create_split_changeset(
1246 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1248 self
.changeset_graph
.add_new_changeset(early_changeset
)
1249 self
.changeset_graph
.add_new_changeset(late_changeset
)
1251 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1253 # Because of the way we constructed it, the early changeset should
1254 # not have to be split:
1255 assert not early_split
1257 self
._split
_if
_retrograde
(late_changeset
.id)
1259 def _split_if_retrograde(self
, changeset_id
):
1260 node
= self
.changeset_graph
[changeset_id
]
1263 for id in node
.pred_ids
1264 if id in self
.ordinals
1266 pred_ordinals
.sort()
1269 for id in node
.succ_ids
1270 if id in self
.ordinals
1272 succ_ordinals
.sort()
1273 if pred_ordinals
and succ_ordinals \
1274 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1275 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1280 def break_segment(self
, segment
):
1281 """Break a changeset in SEGMENT[1:-1].
1283 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1284 that range are SymbolChangesets."""
1288 for i
in range(1, len(segment
) - 1):
1289 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1291 if best_i
is None or link
< best_link
:
1295 if Log().is_on(Log
.DEBUG
):
1297 'Breaking segment %s by breaking node %x' % (
1298 ' -> '.join(['%x' % node
.id for node
in segment
]),
1299 best_link
.changeset
.id,))
1301 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1303 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1305 for changeset
in new_changesets
:
1306 self
.changeset_graph
.add_new_changeset(changeset
)
1308 def break_cycle(self
, cycle
):
1309 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1311 CYCLE is a list of SymbolChangesets where
1313 cycle[i] depends on cycle[i - 1]
1315 . Break up one or more changesets in CYCLE to make progress
1316 towards breaking the cycle. Update self.changeset_graph
1319 It is not guaranteed that the cycle will be broken by one call to
1320 this routine, but at least some progress must be made."""
1322 if Log().is_on(Log
.DEBUG
):
1324 'Breaking cycle %s' % (
1325 ' -> '.join(['%x' % changeset
.id
1326 for changeset
in cycle
+ [cycle
[0]]]),))
1328 # Unwrap the cycle into a segment then break the segment:
1329 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1331 def run(self
, run_options
, stats_keeper
):
1332 Log().quiet("Breaking CVSSymbol dependency loops...")
1334 Ctx()._projects
= read_projects(
1335 artifact_manager
.get_temp_file(config
.PROJECTS
)
1337 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1338 Ctx()._symbol
_db
= SymbolDatabase()
1339 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1340 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1341 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1345 artifact_manager
.get_temp_file(
1346 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1347 artifact_manager
.get_temp_file(
1348 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1349 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1350 artifact_manager
.get_temp_file(
1351 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1354 self
.changeset_db
= ChangesetDatabase(
1355 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1356 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1359 self
.changeset_graph
= ChangesetGraph(
1360 self
.changeset_db
, self
.cvs_item_to_changeset_id
1363 # A map {changeset_id : ordinal} for OrderedChangesets:
1365 # A map {ordinal : changeset_id}:
1366 ordered_changeset_map
= {}
1367 # A list of all BranchChangeset ids:
1368 branch_changeset_ids
= []
1369 max_changeset_id
= 0
1370 for changeset
in self
.get_source_changesets():
1371 self
.changeset_db
.store(changeset
)
1372 self
.changeset_graph
.add_changeset(changeset
)
1373 if isinstance(changeset
, OrderedChangeset
):
1374 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1375 self
.ordinals
[changeset
.id] = changeset
.ordinal
1376 elif isinstance(changeset
, BranchChangeset
):
1377 branch_changeset_ids
.append(changeset
.id)
1378 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1380 # An array of ordered_changeset ids, indexed by ordinal:
1381 ordered_changesets
= []
1382 for ordinal
in range(len(ordered_changeset_map
)):
1383 id = ordered_changeset_map
[ordinal
]
1384 ordered_changesets
.append(id)
1386 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1387 del ordered_changeset_map
1389 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1391 # First we scan through all BranchChangesets looking for
1392 # changesets that are individually "retrograde" and splitting
1394 for changeset_id
in branch_changeset_ids
:
1395 self
._split
_if
_retrograde
(changeset_id
)
1399 next_ordered_changeset
= 0
1401 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1403 while self
.changeset_graph
:
1404 # Consume any nodes that don't have predecessors:
1405 for (changeset
, time_range
) \
1406 in self
.changeset_graph
.consume_nopred_nodes():
1407 self
.processed_changeset_logger
.log(changeset
.id)
1408 if changeset
.id in ordered_changeset_ids
:
1409 next_ordered_changeset
+= 1
1410 ordered_changeset_ids
.remove(changeset
.id)
1412 self
.processed_changeset_logger
.flush()
1414 if not self
.changeset_graph
:
1417 # Now work on the next ordered changeset that has not yet been
1418 # processed. BreakSymbolChangesetCyclesPass has broken any
1419 # cycles involving only SymbolChangesets, so the presence of a
1420 # cycle implies that there is at least one ordered changeset
1421 # left in the graph:
1422 assert next_ordered_changeset
< len(ordered_changesets
)
1424 id = ordered_changesets
[next_ordered_changeset
]
1425 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1427 if Log().is_on(Log
.DEBUG
):
1428 Log().debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1429 self
.break_segment(path
)
1431 # There were no ordered changesets among the reachable
1432 # predecessors, so do generic cycle-breaking:
1433 if Log().is_on(Log
.DEBUG
):
1435 'Breaking generic cycle found from %s'
1436 % (self
.changeset_db
[id],)
1438 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1440 del self
.processed_changeset_logger
1441 self
.changeset_graph
.close()
1442 self
.changeset_graph
= None
1443 self
.cvs_item_to_changeset_id
= None
1444 self
.changeset_db
= None
1449 class TopologicalSortPass(Pass
):
1450 """Sort changesets into commit order."""
1452 def register_artifacts(self
):
1453 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1454 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1455 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1456 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1457 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1458 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1459 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1460 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1461 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1463 def get_source_changesets(self
, changeset_db
):
1464 for changeset_id
in changeset_db
.keys():
1465 yield changeset_db
[changeset_id
]
1467 def get_changesets(self
):
1468 """Generate (changeset, timestamp) pairs in commit order."""
1470 changeset_db
= ChangesetDatabase(
1471 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1472 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1475 changeset_graph
= ChangesetGraph(
1477 CVSItemToChangesetTable(
1478 artifact_manager
.get_temp_file(
1479 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1484 symbol_changeset_ids
= set()
1486 for changeset
in self
.get_source_changesets(changeset_db
):
1487 changeset_graph
.add_changeset(changeset
)
1488 if isinstance(changeset
, SymbolChangeset
):
1489 symbol_changeset_ids
.add(changeset
.id)
1491 # Ensure a monotonically-increasing timestamp series by keeping
1492 # track of the previous timestamp and ensuring that the following
1494 timestamper
= Timestamper()
1496 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1497 timestamp
= timestamper
.get(
1498 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1500 yield (changeset
, timestamp
)
1502 changeset_graph
.close()
1504 def run(self
, run_options
, stats_keeper
):
1505 Log().quiet("Generating CVSRevisions in commit order...")
1507 Ctx()._projects
= read_projects(
1508 artifact_manager
.get_temp_file(config
.PROJECTS
)
1510 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1511 Ctx()._symbol
_db
= SymbolDatabase()
1512 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1513 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1514 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1517 sorted_changesets
= open(
1518 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1521 for (changeset
, timestamp
) in self
.get_changesets():
1522 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1524 sorted_changesets
.close()
1526 Ctx()._cvs
_items
_db
.close()
1527 Ctx()._symbol
_db
.close()
1528 Ctx()._cvs
_path
_db
.close()
1533 class CreateRevsPass(Pass
):
1534 """Generate the SVNCommit <-> CVSRevision mapping databases.
1536 SVNCommitCreator also calls SymbolingsLogger to register
1537 CVSRevisions that represent an opening or closing for a path on a
1538 branch or tag. See SymbolingsLogger for more details.
1540 This pass was formerly known as pass5."""
1542 def register_artifacts(self
):
1543 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1544 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1545 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1546 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1547 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1548 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1549 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1550 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1551 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1552 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1553 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1554 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1556 def get_changesets(self
):
1557 """Generate (changeset,timestamp,) tuples in commit order."""
1559 changeset_db
= ChangesetDatabase(
1560 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1561 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1565 artifact_manager
.get_temp_file(
1566 config
.CHANGESETS_SORTED_DATAFILE
)):
1567 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1568 yield (changeset_db
[changeset_id
], timestamp
)
1570 changeset_db
.close()
1572 def get_svn_commits(self
, creator
):
1573 """Generate the SVNCommits, in order."""
1575 for (changeset
, timestamp
) in self
.get_changesets():
1576 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1579 def log_svn_commit(self
, svn_commit
):
1580 """Output information about SVN_COMMIT."""
1583 'Creating Subversion r%d (%s)'
1584 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1587 if isinstance(svn_commit
, SVNRevisionCommit
):
1588 for cvs_rev
in svn_commit
.cvs_revs
:
1589 Log().verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1591 def run(self
, run_options
, stats_keeper
):
1592 Log().quiet("Mapping CVS revisions to Subversion commits...")
1594 Ctx()._projects
= read_projects(
1595 artifact_manager
.get_temp_file(config
.PROJECTS
)
1597 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1598 Ctx()._symbol
_db
= SymbolDatabase()
1599 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1600 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1601 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1604 Ctx()._symbolings
_logger
= SymbolingsLogger()
1606 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1608 creator
= SVNCommitCreator()
1609 for svn_commit
in self
.get_svn_commits(creator
):
1610 self
.log_svn_commit(svn_commit
)
1611 persistence_manager
.put_svn_commit(svn_commit
)
1613 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1616 persistence_manager
.close()
1617 Ctx()._symbolings
_logger
.close()
1618 Ctx()._cvs
_items
_db
.close()
1619 Ctx()._symbol
_db
.close()
1620 Ctx()._cvs
_path
_db
.close()
1625 class SortSymbolOpeningsClosingsPass(Pass
):
1626 """This pass was formerly known as pass6."""
1628 def register_artifacts(self
):
1629 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1630 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1632 def run(self
, run_options
, stats_keeper
):
1633 Log().quiet("Sorting symbolic name source revisions...")
1636 line
= line
.split(' ', 2)
1637 return (int(line
[0], 16), int(line
[1]), line
[2],)
1640 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1641 artifact_manager
.get_temp_file(
1642 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
1645 tempdirs
=[Ctx().tmpdir
],
1650 class IndexSymbolsPass(Pass
):
1651 """This pass was formerly known as pass7."""
1653 def register_artifacts(self
):
1654 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1655 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1656 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1657 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1659 def generate_offsets_for_symbolings(self
):
1660 """This function iterates through all the lines in
1661 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1662 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1663 where SYMBOLIC_NAME is first encountered. This will allow us to
1664 seek to the various offsets in the file and sequentially read only
1665 the openings and closings that we need."""
1670 artifact_manager
.get_temp_file(
1671 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1679 id, svn_revnum
, ignored
= line
.split(" ", 2)
1682 Log().verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1689 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1690 cPickle
.dump(offsets
, offsets_db
, -1)
1693 def run(self
, run_options
, stats_keeper
):
1694 Log().quiet("Determining offsets for all symbolic names...")
1695 Ctx()._projects
= read_projects(
1696 artifact_manager
.get_temp_file(config
.PROJECTS
)
1698 Ctx()._symbol
_db
= SymbolDatabase()
1699 self
.generate_offsets_for_symbolings()
1700 Ctx()._symbol
_db
.close()
1701 Log().quiet("Done.")
1704 class OutputPass(Pass
):
1705 """This pass was formerly known as pass8."""
1707 def register_artifacts(self
):
1708 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1709 self
._register
_temp
_file
_needed
(config
.CVS_PATHS_DB
)
1710 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1711 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1712 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1713 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1714 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1715 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1716 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1717 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1718 Ctx().output_option
.register_artifacts(self
)
1720 def run(self
, run_options
, stats_keeper
):
1721 Ctx()._projects
= read_projects(
1722 artifact_manager
.get_temp_file(config
.PROJECTS
)
1724 Ctx()._cvs
_path
_db
= CVSPathDatabase(DB_OPEN_READ
)
1725 Ctx()._metadata
_db
= MetadataDatabase(
1726 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1727 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1730 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1731 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1732 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1734 Ctx()._symbol
_db
= SymbolDatabase()
1735 Ctx()._persistence
_manager
= PersistenceManager(DB_OPEN_READ
)
1737 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1740 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1742 svn_commit
.output(Ctx().output_option
)
1744 svn_commit
= Ctx()._persistence
_manager
.get_svn_commit(svn_revnum
)
1746 Ctx().output_option
.cleanup()
1747 Ctx()._persistence
_manager
.close()
1749 Ctx()._symbol
_db
.close()
1750 Ctx()._cvs
_items
_db
.close()
1751 Ctx()._metadata
_db
.close()
1752 Ctx()._cvs
_path
_db
.close()
1755 # The list of passes constituting a run of cvs2svn:
1758 CleanMetadataPass(),
1759 CollateSymbolsPass(),
1760 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1761 FilterSymbolsPass(),
1762 SortRevisionsPass(),
1764 InitializeChangesetsPass(),
1765 #CheckIndexedItemStoreDependenciesPass(
1766 # config.CVS_ITEMS_SORTED_STORE,
1767 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1768 BreakRevisionChangesetCyclesPass(),
1769 RevisionTopologicalSortPass(),
1770 BreakSymbolChangesetCyclesPass(),
1771 BreakAllChangesetCyclesPass(),
1772 TopologicalSortPass(),
1774 SortSymbolOpeningsClosingsPass(),