1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
25 from cvs2svn_lib
import config
26 from cvs2svn_lib
.context
import Ctx
27 from cvs2svn_lib
.common
import warning_prefix
28 from cvs2svn_lib
.common
import FatalException
29 from cvs2svn_lib
.common
import FatalError
30 from cvs2svn_lib
.common
import InternalError
31 from cvs2svn_lib
.common
import DB_OPEN_NEW
32 from cvs2svn_lib
.common
import DB_OPEN_READ
33 from cvs2svn_lib
.common
import DB_OPEN_WRITE
34 from cvs2svn_lib
.common
import Timestamper
35 from cvs2svn_lib
.log
import Log
36 from cvs2svn_lib
.pass_manager
import Pass
37 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
38 from cvs2svn_lib
.artifact_manager
import artifact_manager
39 from cvs2svn_lib
.cvs_file_database
import CVSFileDatabase
40 from cvs2svn_lib
.metadata_database
import MetadataDatabase
41 from cvs2svn_lib
.project
import read_projects
42 from cvs2svn_lib
.project
import write_projects
43 from cvs2svn_lib
.symbol
import LineOfDevelopment
44 from cvs2svn_lib
.symbol
import Trunk
45 from cvs2svn_lib
.symbol
import Symbol
46 from cvs2svn_lib
.symbol
import Branch
47 from cvs2svn_lib
.symbol
import Tag
48 from cvs2svn_lib
.symbol
import ExcludedSymbol
49 from cvs2svn_lib
.symbol_database
import SymbolDatabase
50 from cvs2svn_lib
.symbol_database
import create_symbol_database
51 from cvs2svn_lib
.symbol_statistics
import SymbolPlanError
52 from cvs2svn_lib
.symbol_statistics
import IndeterminateSymbolException
53 from cvs2svn_lib
.symbol_statistics
import SymbolStatistics
54 from cvs2svn_lib
.cvs_item
import CVSRevision
55 from cvs2svn_lib
.cvs_item
import CVSSymbol
56 from cvs2svn_lib
.cvs_item_database
import OldCVSItemStore
57 from cvs2svn_lib
.cvs_item_database
import IndexedCVSItemStore
58 from cvs2svn_lib
.cvs_item_database
import cvs_item_primer
59 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib
.cvs_item_database
import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib
.cvs_item_database
import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib
.key_generator
import KeyGenerator
64 from cvs2svn_lib
.changeset
import RevisionChangeset
65 from cvs2svn_lib
.changeset
import OrderedChangeset
66 from cvs2svn_lib
.changeset
import SymbolChangeset
67 from cvs2svn_lib
.changeset
import BranchChangeset
68 from cvs2svn_lib
.changeset
import create_symbol_changeset
69 from cvs2svn_lib
.changeset_graph
import ChangesetGraph
70 from cvs2svn_lib
.changeset_graph_link
import ChangesetGraphLink
71 from cvs2svn_lib
.changeset_database
import ChangesetDatabase
72 from cvs2svn_lib
.changeset_database
import CVSItemToChangesetTable
73 from cvs2svn_lib
.svn_commit
import SVNRevisionCommit
74 from cvs2svn_lib
.openings_closings
import SymbolingsLogger
75 from cvs2svn_lib
.svn_commit_creator
import SVNCommitCreator
76 from cvs2svn_lib
.persistence_manager
import PersistenceManager
77 from cvs2svn_lib
.collect_data
import CollectData
78 from cvs2svn_lib
.process
import call_command
79 from cvs2svn_lib
.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib
.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 def sort_file(infilename
, outfilename
, options
=[]):
86 """Sort file INFILENAME, storing the results to OUTFILENAME.
88 OPTIONS is an optional list of strings that are passed as additional
89 options to the sort command."""
91 # GNU sort will sort our dates differently (incorrectly!) if our
92 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
94 lc_all_tmp
= os
.environ
.get('LC_ALL', None)
95 os
.environ
['LC_ALL'] = 'C'
97 # The -T option to sort has a nice side effect. The Win32 sort is
98 # case insensitive and cannot be used, and since it does not
99 # understand the -T option and dies if we try to use it, there is no
100 # risk that we use that sort by accident.
102 Ctx().sort_executable
,
109 # Under Windows, the subprocess module uses the Win32
110 # CreateProcess, which always looks in the Windows system32
111 # directory before it looks in the directories listed in the PATH
112 # environment variable. Since the Windows sort.exe is in the
113 # system32 directory it will always be chosen. A simple
114 # workaround is to launch the sort in a shell. When the shell
115 # (cmd.exe) searches it only examines the directories in the PATH
116 # so putting the directory with GNU sort ahead of the Windows
117 # system32 directory will cause GNU sort to be chosen.
119 command
, stdout
=open(outfilename
, 'w'), shell
=(sys
.platform
=='win32')
122 if lc_all_tmp
is None:
123 del os
.environ
['LC_ALL']
125 os
.environ
['LC_ALL'] = lc_all_tmp
127 # On some versions of Windows, os.system() does not return an error
128 # if the command fails. So add little consistency tests here that
129 # the output file was created and has the right size:
131 if not os
.path
.exists(outfilename
):
132 raise FatalError('Sort output file missing: %r' % (outfilename
,))
134 if os
.path
.getsize(outfilename
) != os
.path
.getsize(infilename
):
136 'Sort input and output file sizes differ:\n'
139 infilename
, os
.path
.getsize(infilename
),
140 outfilename
, os
.path
.getsize(outfilename
),
145 class CollectRevsPass(Pass
):
146 """This pass was formerly known as pass1."""
148 def register_artifacts(self
):
149 self
._register
_temp
_file
(config
.PROJECTS
)
150 self
._register
_temp
_file
(config
.SYMBOL_STATISTICS
)
151 self
._register
_temp
_file
(config
.METADATA_INDEX_TABLE
)
152 self
._register
_temp
_file
(config
.METADATA_STORE
)
153 self
._register
_temp
_file
(config
.CVS_FILES_DB
)
154 self
._register
_temp
_file
(config
.CVS_ITEMS_STORE
)
155 Ctx().revision_recorder
.register_artifacts(self
)
157 def run(self
, run_options
, stats_keeper
):
158 Log().quiet("Examining all CVS ',v' files...")
160 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_NEW
)
161 cd
= CollectData(Ctx().revision_recorder
, stats_keeper
)
162 for project
in run_options
.projects
:
163 cd
.process_project(project
)
164 run_options
.projects
= None
166 fatal_errors
= cd
.close()
169 raise FatalException("Pass 1 complete.\n"
172 + "\n".join(fatal_errors
) + "\n"
173 + "Exited due to fatal error(s).")
175 Ctx()._cvs
_file
_db
.close()
176 write_projects(artifact_manager
.get_temp_file(config
.PROJECTS
))
180 class CleanMetadataPass(Pass
):
181 """Clean up CVS revision metadata and write it to a new database."""
183 def register_artifacts(self
):
184 self
._register
_temp
_file
(config
.METADATA_CLEAN_INDEX_TABLE
)
185 self
._register
_temp
_file
(config
.METADATA_CLEAN_STORE
)
186 self
._register
_temp
_file
_needed
(config
.METADATA_INDEX_TABLE
)
187 self
._register
_temp
_file
_needed
(config
.METADATA_STORE
)
189 def _get_clean_author(self
, author
):
190 """Return AUTHOR, converted appropriately to UTF8.
192 Raise a UnicodeException if it cannot be converted using the
193 configured cvs_author_decoder."""
196 return self
._authors
[author
]
201 clean_author
= Ctx().cvs_author_decoder(author
)
203 self
._authors
[author
] = author
204 raise UnicodeError('Problem decoding author \'%s\'' % (author
,))
207 clean_author
= clean_author
.encode('utf8')
209 self
._authors
[author
] = author
210 raise UnicodeError('Problem encoding author \'%s\'' % (author
,))
212 self
._authors
[author
] = clean_author
215 def _get_clean_log_msg(self
, log_msg
):
216 """Return LOG_MSG, converted appropriately to UTF8.
218 Raise a UnicodeException if it cannot be converted using the
219 configured cvs_log_decoder."""
222 clean_log_msg
= Ctx().cvs_log_decoder(log_msg
)
225 'Problem decoding log message:\n'
229 % ('-' * 75, log_msg
, '-' * 75,)
233 return clean_log_msg
.encode('utf8')
236 'Problem encoding log message:\n'
240 % ('-' * 75, log_msg
, '-' * 75,)
243 def _clean_metadata(self
, metadata
):
244 """Clean up METADATA by overwriting its members as necessary."""
247 metadata
.author
= self
._get
_clean
_author
(metadata
.author
)
248 except UnicodeError, e
:
249 Log().warn('%s: %s' % (warning_prefix
, e
,))
253 metadata
.log_msg
= self
._get
_clean
_log
_msg
(metadata
.log_msg
)
254 except UnicodeError, e
:
255 Log().warn('%s: %s' % (warning_prefix
, e
,))
258 def run(self
, run_options
, stats_keeper
):
259 Log().quiet("Converting metadata to UTF8...")
260 metadata_db
= MetadataDatabase(
261 artifact_manager
.get_temp_file(config
.METADATA_STORE
),
262 artifact_manager
.get_temp_file(config
.METADATA_INDEX_TABLE
),
265 metadata_clean_db
= MetadataDatabase(
266 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
267 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
271 self
.warnings
= False
273 # A map {author : clean_author} for those known (to avoid
274 # repeating warnings):
277 for id in metadata_db
.iterkeys():
278 metadata
= metadata_db
[id]
280 # Record the original author name because it might be needed for
281 # expanding CVS keywords:
282 metadata
.original_author
= metadata
.author
284 self
._clean
_metadata
(metadata
)
286 metadata_clean_db
[id] = metadata
290 'There were warnings converting author names and/or log messages\n'
291 'to Unicode (see messages above). Please restart this pass\n'
292 'with one or more \'--encoding\' parameters or with\n'
293 '\'--fallback-encoding\'.'
296 metadata_clean_db
.close()
301 class CollateSymbolsPass(Pass
):
302 """Divide symbols into branches, tags, and excludes."""
308 ExcludedSymbol
: 'exclude',
312 def register_artifacts(self
):
313 self
._register
_temp
_file
(config
.SYMBOL_DB
)
314 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
315 self
._register
_temp
_file
_needed
(config
.SYMBOL_STATISTICS
)
317 def get_symbol(self
, run_options
, stats
):
318 """Use StrategyRules to decide what to do with a symbol.
320 STATS is an instance of symbol_statistics._Stats describing an
321 instance of Symbol or Trunk. To determine how the symbol is to be
322 converted, consult the StrategyRules in the project's
323 symbol_strategy_rules. Each rule is allowed a chance to change
324 the way the symbol will be converted. If the symbol is not a
325 Trunk or TypedSymbol after all rules have run, raise
326 IndeterminateSymbolException."""
329 rules
= run_options
.project_symbol_strategy_rules
[symbol
.project
.id]
331 symbol
= rule
.get_symbol(symbol
, stats
)
332 assert symbol
is not None
334 stats
.check_valid(symbol
)
338 def log_symbol_summary(self
, stats
, symbol
):
339 if not self
.symbol_info_file
:
342 if isinstance(symbol
, Trunk
):
344 preferred_parent_name
= '.'
346 name
= stats
.lod
.name
347 if symbol
.preferred_parent_id
is None:
348 preferred_parent_name
= '.'
350 preferred_parent
= self
.symbol_stats
[symbol
.preferred_parent_id
].lod
351 if isinstance(preferred_parent
, Trunk
):
352 preferred_parent_name
= '.trunk.'
354 preferred_parent_name
= preferred_parent
.name
356 if isinstance(symbol
, LineOfDevelopment
) and symbol
.base_path
:
357 symbol_path
= symbol
.base_path
361 self
.symbol_info_file
.write(
362 '%-5d %-30s %-10s %s %s\n' % (
363 stats
.lod
.project
.id,
365 self
.conversion_names
[symbol
.__class
__],
367 preferred_parent_name
,
370 self
.symbol_info_file
.write(' # %s\n' % (stats
,))
371 parent_counts
= stats
.possible_parents
.items()
373 self
.symbol_info_file
.write(' # Possible parents:\n')
374 parent_counts
.sort(lambda a
,b
: cmp((b
[1], a
[0]), (a
[1], b
[0])))
375 for (pp
, count
) in parent_counts
:
376 if isinstance(pp
, Trunk
):
377 self
.symbol_info_file
.write(
378 ' # .trunk. : %d\n' % (count
,)
381 self
.symbol_info_file
.write(
382 ' # %s : %d\n' % (pp
.name
, count
,)
385 def get_symbols(self
, run_options
):
386 """Return a map telling how to convert symbols.
388 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
389 indicating how each symbol should be converted. Trunk objects in
390 SYMBOL_STATS are passed through unchanged. One object is included
391 in the return value for each line of development described in
394 Raise FatalError if there was an error."""
399 if Ctx().symbol_info_filename
is not None:
400 self
.symbol_info_file
= open(Ctx().symbol_info_filename
, 'w')
401 self
.symbol_info_file
.write(
402 '# Columns: project_id symbol_name conversion symbol_path '
403 'preferred_parent_name\n'
406 self
.symbol_info_file
= None
408 # Initialize each symbol strategy rule a single time, even if it
409 # is used in more than one project. First define a map from
410 # object id to symbol strategy rule:
412 for rule_list
in run_options
.project_symbol_strategy_rules
:
413 for rule
in rule_list
:
414 rules
[id(rule
)] = rule
416 for rule
in rules
.itervalues():
417 rule
.start(self
.symbol_stats
)
421 for stats
in self
.symbol_stats
:
423 symbol
= self
.get_symbol(run_options
, stats
)
424 except IndeterminateSymbolException
, e
:
425 self
.log_symbol_summary(stats
, stats
.lod
)
426 mismatches
.append(e
.stats
)
427 except SymbolPlanError
, e
:
428 self
.log_symbol_summary(stats
, stats
.lod
)
431 self
.log_symbol_summary(stats
, symbol
)
432 retval
[stats
.lod
] = symbol
434 for rule
in rules
.itervalues():
437 if self
.symbol_info_file
:
438 self
.symbol_info_file
.close()
440 del self
.symbol_info_file
442 if errors
or mismatches
:
443 s
= ['Problems determining how symbols should be converted:\n']
445 s
.append('%s\n' % (e
,))
448 'It is not clear how the following symbols '
449 'should be converted.\n'
450 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
452 '--symbol-default to resolve the ambiguity.\n'
454 for stats
in mismatches
:
455 s
.append(' %s\n' % (stats
,))
456 raise FatalError(''.join(s
))
460 def run(self
, run_options
, stats_keeper
):
461 Ctx()._projects
= read_projects(
462 artifact_manager
.get_temp_file(config
.PROJECTS
)
464 self
.symbol_stats
= SymbolStatistics(
465 artifact_manager
.get_temp_file(config
.SYMBOL_STATISTICS
)
468 symbol_map
= self
.get_symbols(run_options
)
470 # Check the symbols for consistency and bail out if there were errors:
471 self
.symbol_stats
.check_consistency(symbol_map
)
473 # Check that the symbols all have SVN paths set and that the paths
475 Ctx().output_option
.check_symbols(symbol_map
)
477 for symbol
in symbol_map
.itervalues():
478 if isinstance(symbol
, ExcludedSymbol
):
479 self
.symbol_stats
.exclude_symbol(symbol
)
481 create_symbol_database(symbol_map
.values())
483 del self
.symbol_stats
488 class FilterSymbolsPass(Pass
):
489 """Delete any branches/tags that are to be excluded.
491 Also delete revisions on excluded branches, and delete other
492 references to the excluded symbols."""
494 def register_artifacts(self
):
495 self
._register
_temp
_file
(config
.SUMMARY_SERIALIZER
)
496 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_DATAFILE
)
497 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
498 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
499 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
500 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
501 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_STORE
)
502 Ctx().revision_excluder
.register_artifacts(self
)
504 def run(self
, run_options
, stats_keeper
):
505 Ctx()._projects
= read_projects(
506 artifact_manager
.get_temp_file(config
.PROJECTS
)
508 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
509 Ctx()._symbol
_db
= SymbolDatabase()
510 cvs_item_store
= OldCVSItemStore(
511 artifact_manager
.get_temp_file(config
.CVS_ITEMS_STORE
))
513 cvs_item_serializer
= PrimedPickleSerializer(cvs_item_primer
)
514 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'wb')
515 cPickle
.dump(cvs_item_serializer
, f
, -1)
518 rev_db
= NewSortableCVSRevisionDatabase(
519 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
523 symbol_db
= NewSortableCVSSymbolDatabase(
524 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
528 revision_excluder
= Ctx().revision_excluder
530 Log().quiet("Filtering out excluded symbols and summarizing items...")
532 stats_keeper
.reset_cvs_rev_info()
533 revision_excluder
.start()
535 # Process the cvs items store one file at a time:
536 for cvs_file_items
in cvs_item_store
.iter_cvs_file_items():
537 Log().verbose(cvs_file_items
.cvs_file
.filename
)
538 cvs_file_items
.filter_excluded_symbols(revision_excluder
)
539 cvs_file_items
.mutate_symbols()
540 cvs_file_items
.adjust_parents()
541 cvs_file_items
.refine_symbols()
542 cvs_file_items
.record_opened_symbols()
543 cvs_file_items
.record_closed_symbols()
544 cvs_file_items
.check_link_consistency()
546 # Store whatever is left to the new file and update statistics:
547 stats_keeper
.record_cvs_file(cvs_file_items
.cvs_file
)
548 for cvs_item
in cvs_file_items
.values():
549 stats_keeper
.record_cvs_item(cvs_item
)
551 if isinstance(cvs_item
, CVSRevision
):
553 elif isinstance(cvs_item
, CVSSymbol
):
554 symbol_db
.add(cvs_item
)
556 stats_keeper
.set_stats_reflect_exclude(True)
560 revision_excluder
.finish()
561 cvs_item_store
.close()
562 Ctx()._symbol
_db
.close()
563 Ctx()._cvs
_file
_db
.close()
568 class SortRevisionSummaryPass(Pass
):
569 """Sort the revision summary file."""
571 def register_artifacts(self
):
572 self
._register
_temp
_file
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
573 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_DATAFILE
)
575 def run(self
, run_options
, stats_keeper
):
576 Log().quiet("Sorting CVS revision summaries...")
578 artifact_manager
.get_temp_file(config
.CVS_REVS_SUMMARY_DATAFILE
),
579 artifact_manager
.get_temp_file(
580 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
))
584 class SortSymbolSummaryPass(Pass
):
585 """Sort the symbol summary file."""
587 def register_artifacts(self
):
588 self
._register
_temp
_file
(config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
589 self
._register
_temp
_file
_needed
(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
)
591 def run(self
, run_options
, stats_keeper
):
592 Log().quiet("Sorting CVS symbol summaries...")
594 artifact_manager
.get_temp_file(config
.CVS_SYMBOLS_SUMMARY_DATAFILE
),
595 artifact_manager
.get_temp_file(
596 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
))
600 class InitializeChangesetsPass(Pass
):
601 """Create preliminary CommitSets."""
603 def register_artifacts(self
):
604 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET
)
605 self
._register
_temp
_file
(config
.CHANGESETS_STORE
)
606 self
._register
_temp
_file
(config
.CHANGESETS_INDEX
)
607 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_STORE
)
608 self
._register
_temp
_file
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
609 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
610 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
611 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
612 self
._register
_temp
_file
_needed
(config
.SUMMARY_SERIALIZER
)
613 self
._register
_temp
_file
_needed
(config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
)
614 self
._register
_temp
_file
_needed
(
615 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
)
617 def get_revision_changesets(self
):
618 """Generate revision changesets, one at a time.
620 Each time, yield a list of CVSRevisions that might potentially
621 consititute a changeset."""
623 # Create changesets for CVSRevisions:
624 old_metadata_id
= None
628 db
= OldSortableCVSRevisionDatabase(
629 artifact_manager
.get_temp_file(
630 config
.CVS_REVS_SUMMARY_SORTED_DATAFILE
632 self
.cvs_item_serializer
,
636 if cvs_rev
.metadata_id
!= old_metadata_id \
637 or cvs_rev
.timestamp
> old_timestamp
+ config
.COMMIT_THRESHOLD
:
638 # Start a new changeset. First finish up the old changeset,
641 yield changeset_items
643 old_metadata_id
= cvs_rev
.metadata_id
644 changeset_items
.append(cvs_rev
)
645 old_timestamp
= cvs_rev
.timestamp
647 # Finish up the last changeset, if any:
649 yield changeset_items
651 def get_symbol_changesets(self
):
652 """Generate symbol changesets, one at a time.
654 Each time, yield a list of CVSSymbols that might potentially
655 consititute a changeset."""
660 db
= OldSortableCVSSymbolDatabase(
661 artifact_manager
.get_temp_file(
662 config
.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
664 self
.cvs_item_serializer
,
667 for cvs_symbol
in db
:
668 if cvs_symbol
.symbol
.id != old_symbol_id
:
669 # Start a new changeset. First finish up the old changeset,
672 yield changeset_items
674 old_symbol_id
= cvs_symbol
.symbol
.id
675 changeset_items
.append(cvs_symbol
)
677 # Finish up the last changeset, if any:
679 yield changeset_items
682 def compare_items(a
, b
):
684 cmp(a
.timestamp
, b
.timestamp
)
685 or cmp(a
.cvs_file
.cvs_path
, b
.cvs_file
.cvs_path
)
686 or cmp([int(x
) for x
in a
.rev
.split('.')],
687 [int(x
) for x
in b
.rev
.split('.')])
690 def break_internal_dependencies(self
, changeset_items
):
691 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
693 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
694 belong in a single RevisionChangeset, but there might be internal
695 dependencies among the items. Return a list of lists, where each
696 sublist is a list of CVSRevisions and at least one internal
697 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
698 to be split, then the return value will contain a single value,
699 namely the original value of CHANGESET_ITEMS. Split
700 CHANGESET_ITEMS at most once, even though the resulting changesets
701 might themselves have internal dependencies."""
703 # We only look for succ dependencies, since by doing so we
704 # automatically cover pred dependencies as well. First create a
705 # list of tuples (pred, succ) of id pairs for CVSItems that depend
708 changeset_cvs_item_ids
= set([cvs_rev
.id for cvs_rev
in changeset_items
])
709 for cvs_item
in changeset_items
:
710 for next_id
in cvs_item
.get_succ_ids():
711 if next_id
in changeset_cvs_item_ids
:
712 # Sanity check: a CVSItem should never depend on itself:
713 if next_id
== cvs_item
.id:
714 raise InternalError('Item depends on itself: %s' % (cvs_item
,))
716 dependencies
.append((cvs_item
.id, next_id
,))
719 # Sort the changeset_items in a defined order (chronological to the
720 # extent that the timestamps are correct and unique).
721 changeset_items
.sort(self
.compare_items
)
723 for (i
, changeset_item
) in enumerate(changeset_items
):
724 indexes
[changeset_item
.id] = i
725 # How many internal dependencies would be broken by breaking the
726 # Changeset after a particular index?
727 breaks
= [0] * len(changeset_items
)
728 for (pred
, succ
,) in dependencies
:
729 pred_index
= indexes
[pred
]
730 succ_index
= indexes
[succ
]
731 breaks
[min(pred_index
, succ_index
)] += 1
732 breaks
[max(pred_index
, succ_index
)] -= 1
736 for i
in range(1, len(breaks
)):
737 breaks
[i
] += breaks
[i
- 1]
738 for i
in range(0, len(breaks
) - 1):
739 if breaks
[i
] > best_count
:
741 best_count
= breaks
[i
]
742 best_time
= (changeset_items
[i
+ 1].timestamp
743 - changeset_items
[i
].timestamp
)
744 elif breaks
[i
] == best_count \
745 and (changeset_items
[i
+ 1].timestamp
746 - changeset_items
[i
].timestamp
) < best_time
:
748 best_count
= breaks
[i
]
749 best_time
= (changeset_items
[i
+ 1].timestamp
750 - changeset_items
[i
].timestamp
)
751 # Reuse the old changeset.id for the first of the split changesets.
752 return [changeset_items
[:best_i
+ 1], changeset_items
[best_i
+ 1:]]
754 return [changeset_items
]
756 def break_all_internal_dependencies(self
, changeset_items
):
757 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
759 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
760 be part of a single changeset. Break this list into sublists,
761 where the CVSRevisions in each sublist are free of mutual
764 # This method is written non-recursively to avoid any possible
765 # problems with recursion depth.
767 changesets_to_split
= [changeset_items
]
768 while changesets_to_split
:
769 changesets
= self
.break_internal_dependencies(changesets_to_split
.pop())
770 if len(changesets
) == 1:
771 [changeset_items
] = changesets
772 yield changeset_items
774 # The changeset had to be split; see if either of the
775 # fragments have to be split:
777 changesets_to_split
.extend(changesets
)
779 def get_changesets(self
):
780 """Generate (Changeset, [CVSItem,...]) for all changesets.
782 The Changesets already have their internal dependencies broken.
783 The [CVSItem,...] list is the list of CVSItems in the
784 corresponding Changeset."""
786 for changeset_items
in self
.get_revision_changesets():
787 for split_changeset_items \
788 in self
.break_all_internal_dependencies(changeset_items
):
791 self
.changeset_key_generator
.gen_id(),
792 [cvs_rev
.id for cvs_rev
in split_changeset_items
]
794 split_changeset_items
,
797 for changeset_items
in self
.get_symbol_changesets():
799 create_symbol_changeset(
800 self
.changeset_key_generator
.gen_id(),
801 changeset_items
[0].symbol
,
802 [cvs_symbol
.id for cvs_symbol
in changeset_items
]
807 def run(self
, run_options
, stats_keeper
):
808 Log().quiet("Creating preliminary commit sets...")
810 Ctx()._projects
= read_projects(
811 artifact_manager
.get_temp_file(config
.PROJECTS
)
813 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
814 Ctx()._symbol
_db
= SymbolDatabase()
816 f
= open(artifact_manager
.get_temp_file(config
.SUMMARY_SERIALIZER
), 'rb')
817 self
.cvs_item_serializer
= cPickle
.load(f
)
820 changeset_db
= ChangesetDatabase(
821 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
822 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
825 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
826 artifact_manager
.get_temp_file(config
.CVS_ITEM_TO_CHANGESET
),
830 self
.sorted_cvs_items_db
= IndexedCVSItemStore(
831 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
832 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
835 self
.changeset_key_generator
= KeyGenerator()
837 for (changeset
, changeset_items
) in self
.get_changesets():
838 if Log().is_on(Log
.DEBUG
):
839 Log().debug(repr(changeset
))
840 changeset_db
.store(changeset
)
841 for cvs_item
in changeset_items
:
842 self
.sorted_cvs_items_db
.add(cvs_item
)
843 cvs_item_to_changeset_id
[cvs_item
.id] = changeset
.id
845 self
.sorted_cvs_items_db
.close()
846 cvs_item_to_changeset_id
.close()
848 Ctx()._symbol
_db
.close()
849 Ctx()._cvs
_file
_db
.close()
851 del self
.cvs_item_serializer
856 class ProcessedChangesetLogger
:
858 self
.processed_changeset_ids
= []
860 def log(self
, changeset_id
):
861 if Log().is_on(Log
.DEBUG
):
862 self
.processed_changeset_ids
.append(changeset_id
)
865 if self
.processed_changeset_ids
:
867 'Consumed changeset ids %s'
868 % (', '.join(['%x' % id for id in self
.processed_changeset_ids
]),))
870 del self
.processed_changeset_ids
[:]
873 class BreakRevisionChangesetCyclesPass(Pass
):
874 """Break up any dependency cycles involving only RevisionChangesets."""
876 def register_artifacts(self
):
877 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_STORE
)
878 self
._register
_temp
_file
(config
.CHANGESETS_REVBROKEN_INDEX
)
879 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
880 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
881 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
882 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
883 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
884 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
885 self
._register
_temp
_file
_needed
(config
.CHANGESETS_STORE
)
886 self
._register
_temp
_file
_needed
(config
.CHANGESETS_INDEX
)
887 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET
)
889 def get_source_changesets(self
):
890 old_changeset_db
= ChangesetDatabase(
891 artifact_manager
.get_temp_file(config
.CHANGESETS_STORE
),
892 artifact_manager
.get_temp_file(config
.CHANGESETS_INDEX
),
895 changeset_ids
= old_changeset_db
.keys()
897 for changeset_id
in changeset_ids
:
898 yield old_changeset_db
[changeset_id
]
900 old_changeset_db
.close()
903 def break_cycle(self
, cycle
):
904 """Break up one or more changesets in CYCLE to help break the cycle.
906 CYCLE is a list of Changesets where
908 cycle[i] depends on cycle[i - 1]
910 Break up one or more changesets in CYCLE to make progress towards
911 breaking the cycle. Update self.changeset_graph accordingly.
913 It is not guaranteed that the cycle will be broken by one call to
914 this routine, but at least some progress must be made."""
916 self
.processed_changeset_logger
.flush()
919 for i
in range(len(cycle
)):
920 # It's OK if this index wraps to -1:
921 link
= ChangesetGraphLink(
922 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
924 if best_i
is None or link
< best_link
:
928 if Log().is_on(Log
.DEBUG
):
930 'Breaking cycle %s by breaking node %x' % (
931 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
932 best_link
.changeset
.id,))
934 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
936 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
938 for changeset
in new_changesets
:
939 self
.changeset_graph
.add_new_changeset(changeset
)
941 def run(self
, run_options
, stats_keeper
):
942 Log().quiet("Breaking revision changeset dependency cycles...")
944 Ctx()._projects
= read_projects(
945 artifact_manager
.get_temp_file(config
.PROJECTS
)
947 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
948 Ctx()._symbol
_db
= SymbolDatabase()
949 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
950 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
951 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
955 artifact_manager
.get_temp_file(
956 config
.CVS_ITEM_TO_CHANGESET
),
957 artifact_manager
.get_temp_file(
958 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
))
959 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
960 artifact_manager
.get_temp_file(
961 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
964 changeset_db
= ChangesetDatabase(
965 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
966 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
969 self
.changeset_graph
= ChangesetGraph(
970 changeset_db
, cvs_item_to_changeset_id
974 for changeset
in self
.get_source_changesets():
975 changeset_db
.store(changeset
)
976 if isinstance(changeset
, RevisionChangeset
):
977 self
.changeset_graph
.add_changeset(changeset
)
978 max_changeset_id
= max(max_changeset_id
, changeset
.id)
980 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
982 self
.processed_changeset_logger
= ProcessedChangesetLogger()
984 # Consume the graph, breaking cycles using self.break_cycle():
985 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
986 cycle_breaker
=self
.break_cycle
988 self
.processed_changeset_logger
.log(changeset
.id)
990 self
.processed_changeset_logger
.flush()
991 del self
.processed_changeset_logger
993 self
.changeset_graph
.close()
994 self
.changeset_graph
= None
995 Ctx()._cvs
_items
_db
.close()
996 Ctx()._symbol
_db
.close()
997 Ctx()._cvs
_file
_db
.close()
1002 class RevisionTopologicalSortPass(Pass
):
1003 """Sort RevisionChangesets into commit order.
1005 Also convert them to OrderedChangesets, without changing their ids."""
1007 def register_artifacts(self
):
1008 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_STORE
)
1009 self
._register
_temp
_file
(config
.CHANGESETS_REVSORTED_INDEX
)
1010 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1011 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1012 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1013 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1014 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1015 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_STORE
)
1016 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVBROKEN_INDEX
)
1017 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1019 def get_source_changesets(self
, changeset_db
):
1020 changeset_ids
= changeset_db
.keys()
1022 for changeset_id
in changeset_ids
:
1023 yield changeset_db
[changeset_id
]
1025 def get_changesets(self
):
1026 changeset_db
= ChangesetDatabase(
1027 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_STORE
),
1028 artifact_manager
.get_temp_file(config
.CHANGESETS_REVBROKEN_INDEX
),
1032 changeset_graph
= ChangesetGraph(
1034 CVSItemToChangesetTable(
1035 artifact_manager
.get_temp_file(
1036 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
1042 for changeset
in self
.get_source_changesets(changeset_db
):
1043 if isinstance(changeset
, RevisionChangeset
):
1044 changeset_graph
.add_changeset(changeset
)
1051 changeset_ids
.append(None)
1053 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1054 changeset_ids
.append(changeset
.id)
1057 changeset_ids
.append(None)
1059 for i
in range(1, len(changeset_ids
) - 1):
1060 changeset
= changeset_db
[changeset_ids
[i
]]
1061 yield OrderedChangeset(
1062 changeset
.id, changeset
.cvs_item_ids
, i
- 1,
1063 changeset_ids
[i
- 1], changeset_ids
[i
+ 1])
1065 changeset_graph
.close()
1067 def run(self
, run_options
, stats_keeper
):
1068 Log().quiet("Generating CVSRevisions in commit order...")
1070 Ctx()._projects
= read_projects(
1071 artifact_manager
.get_temp_file(config
.PROJECTS
)
1073 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1074 Ctx()._symbol
_db
= SymbolDatabase()
1075 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1076 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1077 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1080 changesets_revordered_db
= ChangesetDatabase(
1081 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1082 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1085 for changeset
in self
.get_changesets():
1086 changesets_revordered_db
.store(changeset
)
1088 changesets_revordered_db
.close()
1089 Ctx()._cvs
_items
_db
.close()
1090 Ctx()._symbol
_db
.close()
1091 Ctx()._cvs
_file
_db
.close()
1096 class BreakSymbolChangesetCyclesPass(Pass
):
1097 """Break up any dependency cycles involving only SymbolChangesets."""
1099 def register_artifacts(self
):
1100 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_STORE
)
1101 self
._register
_temp
_file
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1102 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1103 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1104 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1105 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1106 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1107 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1108 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_STORE
)
1109 self
._register
_temp
_file
_needed
(config
.CHANGESETS_REVSORTED_INDEX
)
1110 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
)
1112 def get_source_changesets(self
):
1113 old_changeset_db
= ChangesetDatabase(
1114 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_STORE
),
1115 artifact_manager
.get_temp_file(config
.CHANGESETS_REVSORTED_INDEX
),
1118 changeset_ids
= old_changeset_db
.keys()
1120 for changeset_id
in changeset_ids
:
1121 yield old_changeset_db
[changeset_id
]
1123 old_changeset_db
.close()
1125 def break_cycle(self
, cycle
):
1126 """Break up one or more changesets in CYCLE to help break the cycle.
1128 CYCLE is a list of Changesets where
1130 cycle[i] depends on cycle[i - 1]
1132 Break up one or more changesets in CYCLE to make progress towards
1133 breaking the cycle. Update self.changeset_graph accordingly.
1135 It is not guaranteed that the cycle will be broken by one call to
1136 this routine, but at least some progress must be made."""
1138 self
.processed_changeset_logger
.flush()
1141 for i
in range(len(cycle
)):
1142 # It's OK if this index wraps to -1:
1143 link
= ChangesetGraphLink(
1144 cycle
[i
- 1], cycle
[i
], cycle
[i
+ 1 - len(cycle
)])
1146 if best_i
is None or link
< best_link
:
1150 if Log().is_on(Log
.DEBUG
):
1152 'Breaking cycle %s by breaking node %x' % (
1153 ' -> '.join(['%x' % node
.id for node
in (cycle
+ [cycle
[0]])]),
1154 best_link
.changeset
.id,))
1156 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1158 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1160 for changeset
in new_changesets
:
1161 self
.changeset_graph
.add_new_changeset(changeset
)
1163 def run(self
, run_options
, stats_keeper
):
1164 Log().quiet("Breaking symbol changeset dependency cycles...")
1166 Ctx()._projects
= read_projects(
1167 artifact_manager
.get_temp_file(config
.PROJECTS
)
1169 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1170 Ctx()._symbol
_db
= SymbolDatabase()
1171 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1172 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1173 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1177 artifact_manager
.get_temp_file(
1178 config
.CVS_ITEM_TO_CHANGESET_REVBROKEN
),
1179 artifact_manager
.get_temp_file(
1180 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
))
1181 cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1182 artifact_manager
.get_temp_file(
1183 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1186 changeset_db
= ChangesetDatabase(
1187 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1188 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1191 self
.changeset_graph
= ChangesetGraph(
1192 changeset_db
, cvs_item_to_changeset_id
1195 max_changeset_id
= 0
1196 for changeset
in self
.get_source_changesets():
1197 changeset_db
.store(changeset
)
1198 if isinstance(changeset
, SymbolChangeset
):
1199 self
.changeset_graph
.add_changeset(changeset
)
1200 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1202 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1204 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1206 # Consume the graph, breaking cycles using self.break_cycle():
1207 for (changeset
, time_range
) in self
.changeset_graph
.consume_graph(
1208 cycle_breaker
=self
.break_cycle
1210 self
.processed_changeset_logger
.log(changeset
.id)
1212 self
.processed_changeset_logger
.flush()
1213 del self
.processed_changeset_logger
1215 self
.changeset_graph
.close()
1216 self
.changeset_graph
= None
1217 Ctx()._cvs
_items
_db
.close()
1218 Ctx()._symbol
_db
.close()
1219 Ctx()._cvs
_file
_db
.close()
1224 class BreakAllChangesetCyclesPass(Pass
):
1225 """Break up any dependency cycles that are closed by SymbolChangesets."""
1227 def register_artifacts(self
):
1228 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_STORE
)
1229 self
._register
_temp
_file
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1230 self
._register
_temp
_file
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1231 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1232 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1233 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1234 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1235 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1236 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_STORE
)
1237 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SYMBROKEN_INDEX
)
1238 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
)
1240 def get_source_changesets(self
):
1241 old_changeset_db
= ChangesetDatabase(
1242 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_STORE
),
1243 artifact_manager
.get_temp_file(config
.CHANGESETS_SYMBROKEN_INDEX
),
1246 changeset_ids
= old_changeset_db
.keys()
1248 for changeset_id
in changeset_ids
:
1249 yield old_changeset_db
[changeset_id
]
1251 old_changeset_db
.close()
1253 def _split_retrograde_changeset(self
, changeset
):
1254 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1256 Log().debug('Breaking retrograde changeset %x' % (changeset
.id,))
1258 self
.changeset_graph
.delete_changeset(changeset
)
1260 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1262 for cvs_branch
in changeset
.iter_cvs_items():
1263 max_pred_ordinal
= 0
1264 min_succ_ordinal
= sys
.maxint
1266 for pred_id
in cvs_branch
.get_pred_ids():
1267 pred_ordinal
= self
.ordinals
.get(
1268 self
.cvs_item_to_changeset_id
[pred_id
], 0)
1269 max_pred_ordinal
= max(max_pred_ordinal
, pred_ordinal
)
1271 for succ_id
in cvs_branch
.get_succ_ids():
1272 succ_ordinal
= self
.ordinals
.get(
1273 self
.cvs_item_to_changeset_id
[succ_id
], sys
.maxint
)
1274 min_succ_ordinal
= min(min_succ_ordinal
, succ_ordinal
)
1276 assert max_pred_ordinal
< min_succ_ordinal
1277 ordinal_limits
[cvs_branch
.id] = (max_pred_ordinal
, min_succ_ordinal
,)
1279 # Find the earliest successor ordinal:
1280 min_min_succ_ordinal
= sys
.maxint
1281 for (max_pred_ordinal
, min_succ_ordinal
) in ordinal_limits
.values():
1282 min_min_succ_ordinal
= min(min_min_succ_ordinal
, min_succ_ordinal
)
1286 for (id, (max_pred_ordinal
, min_succ_ordinal
)) in ordinal_limits
.items():
1287 if max_pred_ordinal
>= min_min_succ_ordinal
:
1288 late_item_ids
.append(id)
1290 early_item_ids
.append(id)
1292 assert early_item_ids
1293 assert late_item_ids
1295 early_changeset
= changeset
.create_split_changeset(
1296 self
.changeset_key_generator
.gen_id(), early_item_ids
)
1297 late_changeset
= changeset
.create_split_changeset(
1298 self
.changeset_key_generator
.gen_id(), late_item_ids
)
1300 self
.changeset_graph
.add_new_changeset(early_changeset
)
1301 self
.changeset_graph
.add_new_changeset(late_changeset
)
1303 early_split
= self
._split
_if
_retrograde
(early_changeset
.id)
1305 # Because of the way we constructed it, the early changeset should
1306 # not have to be split:
1307 assert not early_split
1309 self
._split
_if
_retrograde
(late_changeset
.id)
1311 def _split_if_retrograde(self
, changeset_id
):
1312 node
= self
.changeset_graph
[changeset_id
]
1315 for id in node
.pred_ids
1316 if id in self
.ordinals
1318 pred_ordinals
.sort()
1321 for id in node
.succ_ids
1322 if id in self
.ordinals
1324 succ_ordinals
.sort()
1325 if pred_ordinals
and succ_ordinals \
1326 and pred_ordinals
[-1] >= succ_ordinals
[0]:
1327 self
._split
_retrograde
_changeset
(self
.changeset_db
[node
.id])
1332 def break_segment(self
, segment
):
1333 """Break a changeset in SEGMENT[1:-1].
1335 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1336 that range are SymbolChangesets."""
1340 for i
in range(1, len(segment
) - 1):
1341 link
= ChangesetGraphLink(segment
[i
- 1], segment
[i
], segment
[i
+ 1])
1343 if best_i
is None or link
< best_link
:
1347 if Log().is_on(Log
.DEBUG
):
1349 'Breaking segment %s by breaking node %x' % (
1350 ' -> '.join(['%x' % node
.id for node
in segment
]),
1351 best_link
.changeset
.id,))
1353 new_changesets
= best_link
.break_changeset(self
.changeset_key_generator
)
1355 self
.changeset_graph
.delete_changeset(best_link
.changeset
)
1357 for changeset
in new_changesets
:
1358 self
.changeset_graph
.add_new_changeset(changeset
)
1360 def break_cycle(self
, cycle
):
1361 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1363 CYCLE is a list of SymbolChangesets where
1365 cycle[i] depends on cycle[i - 1]
1367 . Break up one or more changesets in CYCLE to make progress
1368 towards breaking the cycle. Update self.changeset_graph
1371 It is not guaranteed that the cycle will be broken by one call to
1372 this routine, but at least some progress must be made."""
1374 if Log().is_on(Log
.DEBUG
):
1376 'Breaking cycle %s' % (
1377 ' -> '.join(['%x' % changeset
.id
1378 for changeset
in cycle
+ [cycle
[0]]]),))
1380 # Unwrap the cycle into a segment then break the segment:
1381 self
.break_segment([cycle
[-1]] + cycle
+ [cycle
[0]])
1383 def run(self
, run_options
, stats_keeper
):
1384 Log().quiet("Breaking CVSSymbol dependency loops...")
1386 Ctx()._projects
= read_projects(
1387 artifact_manager
.get_temp_file(config
.PROJECTS
)
1389 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1390 Ctx()._symbol
_db
= SymbolDatabase()
1391 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1392 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1393 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1397 artifact_manager
.get_temp_file(
1398 config
.CVS_ITEM_TO_CHANGESET_SYMBROKEN
),
1399 artifact_manager
.get_temp_file(
1400 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
))
1401 self
.cvs_item_to_changeset_id
= CVSItemToChangesetTable(
1402 artifact_manager
.get_temp_file(
1403 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
),
1406 self
.changeset_db
= ChangesetDatabase(
1407 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1408 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1411 self
.changeset_graph
= ChangesetGraph(
1412 self
.changeset_db
, self
.cvs_item_to_changeset_id
1415 # A map {changeset_id : ordinal} for OrderedChangesets:
1417 # A map {ordinal : changeset_id}:
1418 ordered_changeset_map
= {}
1419 # A list of all BranchChangeset ids:
1420 branch_changeset_ids
= []
1421 max_changeset_id
= 0
1422 for changeset
in self
.get_source_changesets():
1423 self
.changeset_db
.store(changeset
)
1424 self
.changeset_graph
.add_changeset(changeset
)
1425 if isinstance(changeset
, OrderedChangeset
):
1426 ordered_changeset_map
[changeset
.ordinal
] = changeset
.id
1427 self
.ordinals
[changeset
.id] = changeset
.ordinal
1428 elif isinstance(changeset
, BranchChangeset
):
1429 branch_changeset_ids
.append(changeset
.id)
1430 max_changeset_id
= max(max_changeset_id
, changeset
.id)
1432 # An array of ordered_changeset ids, indexed by ordinal:
1433 ordered_changesets
= []
1434 for ordinal
in range(len(ordered_changeset_map
)):
1435 id = ordered_changeset_map
[ordinal
]
1436 ordered_changesets
.append(id)
1438 ordered_changeset_ids
= set(ordered_changeset_map
.values())
1439 del ordered_changeset_map
1441 self
.changeset_key_generator
= KeyGenerator(max_changeset_id
+ 1)
1443 # First we scan through all BranchChangesets looking for
1444 # changesets that are individually "retrograde" and splitting
1446 for changeset_id
in branch_changeset_ids
:
1447 self
._split
_if
_retrograde
(changeset_id
)
1451 next_ordered_changeset
= 0
1453 self
.processed_changeset_logger
= ProcessedChangesetLogger()
1455 while self
.changeset_graph
:
1456 # Consume any nodes that don't have predecessors:
1457 for (changeset
, time_range
) \
1458 in self
.changeset_graph
.consume_nopred_nodes():
1459 self
.processed_changeset_logger
.log(changeset
.id)
1460 if changeset
.id in ordered_changeset_ids
:
1461 next_ordered_changeset
+= 1
1462 ordered_changeset_ids
.remove(changeset
.id)
1464 self
.processed_changeset_logger
.flush()
1466 if not self
.changeset_graph
:
1469 # Now work on the next ordered changeset that has not yet been
1470 # processed. BreakSymbolChangesetCyclesPass has broken any
1471 # cycles involving only SymbolChangesets, so the presence of a
1472 # cycle implies that there is at least one ordered changeset
1473 # left in the graph:
1474 assert next_ordered_changeset
< len(ordered_changesets
)
1476 id = ordered_changesets
[next_ordered_changeset
]
1477 path
= self
.changeset_graph
.search_for_path(id, ordered_changeset_ids
)
1479 if Log().is_on(Log
.DEBUG
):
1480 Log().debug('Breaking path from %s to %s' % (path
[0], path
[-1],))
1481 self
.break_segment(path
)
1483 # There were no ordered changesets among the reachable
1484 # predecessors, so do generic cycle-breaking:
1485 if Log().is_on(Log
.DEBUG
):
1487 'Breaking generic cycle found from %s'
1488 % (self
.changeset_db
[id],)
1490 self
.break_cycle(self
.changeset_graph
.find_cycle(id))
1492 del self
.processed_changeset_logger
1493 self
.changeset_graph
.close()
1494 self
.changeset_graph
= None
1495 self
.cvs_item_to_changeset_id
= None
1496 self
.changeset_db
= None
1501 class TopologicalSortPass(Pass
):
1502 """Sort changesets into commit order."""
1504 def register_artifacts(self
):
1505 self
._register
_temp
_file
(config
.CHANGESETS_SORTED_DATAFILE
)
1506 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1507 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1508 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1509 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1510 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1511 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1512 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1513 self
._register
_temp
_file
_needed
(config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
)
1515 def get_source_changesets(self
, changeset_db
):
1516 for changeset_id
in changeset_db
.keys():
1517 yield changeset_db
[changeset_id
]
1519 def get_changesets(self
):
1520 """Generate (changeset, timestamp) pairs in commit order."""
1522 changeset_db
= ChangesetDatabase(
1523 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1524 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1527 changeset_graph
= ChangesetGraph(
1529 CVSItemToChangesetTable(
1530 artifact_manager
.get_temp_file(
1531 config
.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1536 symbol_changeset_ids
= set()
1538 for changeset
in self
.get_source_changesets(changeset_db
):
1539 changeset_graph
.add_changeset(changeset
)
1540 if isinstance(changeset
, SymbolChangeset
):
1541 symbol_changeset_ids
.add(changeset
.id)
1543 # Ensure a monotonically-increasing timestamp series by keeping
1544 # track of the previous timestamp and ensuring that the following
1546 timestamper
= Timestamper()
1548 for (changeset
, time_range
) in changeset_graph
.consume_graph():
1549 timestamp
= timestamper
.get(
1550 time_range
.t_max
, changeset
.id in symbol_changeset_ids
1552 yield (changeset
, timestamp
)
1554 changeset_graph
.close()
1556 def run(self
, run_options
, stats_keeper
):
1557 Log().quiet("Generating CVSRevisions in commit order...")
1559 Ctx()._projects
= read_projects(
1560 artifact_manager
.get_temp_file(config
.PROJECTS
)
1562 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1563 Ctx()._symbol
_db
= SymbolDatabase()
1564 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1565 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1566 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1569 sorted_changesets
= open(
1570 artifact_manager
.get_temp_file(config
.CHANGESETS_SORTED_DATAFILE
),
1573 for (changeset
, timestamp
) in self
.get_changesets():
1574 sorted_changesets
.write('%x %08x\n' % (changeset
.id, timestamp
,))
1576 sorted_changesets
.close()
1578 Ctx()._cvs
_items
_db
.close()
1579 Ctx()._symbol
_db
.close()
1580 Ctx()._cvs
_file
_db
.close()
1585 class CreateRevsPass(Pass
):
1586 """Generate the SVNCommit <-> CVSRevision mapping databases.
1588 SVNCommitCreator also calls SymbolingsLogger to register
1589 CVSRevisions that represent an opening or closing for a path on a
1590 branch or tag. See SymbolingsLogger for more details.
1592 This pass was formerly known as pass5."""
1594 def register_artifacts(self
):
1595 self
._register
_temp
_file
(config
.SVN_COMMITS_INDEX_TABLE
)
1596 self
._register
_temp
_file
(config
.SVN_COMMITS_STORE
)
1597 self
._register
_temp
_file
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1598 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1599 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1600 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1601 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1602 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1603 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1604 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_STORE
)
1605 self
._register
_temp
_file
_needed
(config
.CHANGESETS_ALLBROKEN_INDEX
)
1606 self
._register
_temp
_file
_needed
(config
.CHANGESETS_SORTED_DATAFILE
)
1608 def get_changesets(self
):
1609 """Generate (changeset,timestamp,) tuples in commit order."""
1611 changeset_db
= ChangesetDatabase(
1612 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_STORE
),
1613 artifact_manager
.get_temp_file(config
.CHANGESETS_ALLBROKEN_INDEX
),
1617 artifact_manager
.get_temp_file(
1618 config
.CHANGESETS_SORTED_DATAFILE
)):
1619 [changeset_id
, timestamp
] = [int(s
, 16) for s
in line
.strip().split()]
1620 yield (changeset_db
[changeset_id
], timestamp
)
1622 changeset_db
.close()
1624 def get_svn_commits(self
, creator
):
1625 """Generate the SVNCommits, in order."""
1627 for (changeset
, timestamp
) in self
.get_changesets():
1628 for svn_commit
in creator
.process_changeset(changeset
, timestamp
):
1631 def log_svn_commit(self
, svn_commit
):
1632 """Output information about SVN_COMMIT."""
1635 'Creating Subversion r%d (%s)'
1636 % (svn_commit
.revnum
, svn_commit
.get_description(),)
1639 if isinstance(svn_commit
, SVNRevisionCommit
):
1640 for cvs_rev
in svn_commit
.cvs_revs
:
1641 Log().verbose(' %s %s' % (cvs_rev
.cvs_path
, cvs_rev
.rev
,))
1643 def run(self
, run_options
, stats_keeper
):
1644 Log().quiet("Mapping CVS revisions to Subversion commits...")
1646 Ctx()._projects
= read_projects(
1647 artifact_manager
.get_temp_file(config
.PROJECTS
)
1649 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1650 Ctx()._symbol
_db
= SymbolDatabase()
1651 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1652 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1653 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1656 Ctx()._symbolings
_logger
= SymbolingsLogger()
1658 persistence_manager
= PersistenceManager(DB_OPEN_NEW
)
1660 creator
= SVNCommitCreator()
1661 for svn_commit
in self
.get_svn_commits(creator
):
1662 self
.log_svn_commit(svn_commit
)
1663 persistence_manager
.put_svn_commit(svn_commit
)
1665 stats_keeper
.set_svn_rev_count(creator
.revnum_generator
.get_last_id())
1668 persistence_manager
.close()
1669 Ctx()._symbolings
_logger
.close()
1670 Ctx()._cvs
_items
_db
.close()
1671 Ctx()._symbol
_db
.close()
1672 Ctx()._cvs
_file
_db
.close()
1677 class SortSymbolsPass(Pass
):
1678 """This pass was formerly known as pass6."""
1680 def register_artifacts(self
):
1681 self
._register
_temp
_file
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1682 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS
)
1684 def run(self
, run_options
, stats_keeper
):
1685 Log().quiet("Sorting symbolic name source revisions...")
1688 artifact_manager
.get_temp_file(config
.SYMBOL_OPENINGS_CLOSINGS
),
1689 artifact_manager
.get_temp_file(
1690 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1691 options
=['-k', '1,1', '-k', '2,2n', '-k', '3'],
1696 class IndexSymbolsPass(Pass
):
1697 """This pass was formerly known as pass7."""
1699 def register_artifacts(self
):
1700 self
._register
_temp
_file
(config
.SYMBOL_OFFSETS_DB
)
1701 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1702 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1703 self
._register
_temp
_file
_needed
(config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
)
1705 def generate_offsets_for_symbolings(self
):
1706 """This function iterates through all the lines in
1707 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1708 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1709 where SYMBOLIC_NAME is first encountered. This will allow us to
1710 seek to the various offsets in the file and sequentially read only
1711 the openings and closings that we need."""
1716 artifact_manager
.get_temp_file(
1717 config
.SYMBOL_OPENINGS_CLOSINGS_SORTED
),
1725 id, svn_revnum
, ignored
= line
.split(" ", 2)
1728 Log().verbose(' ', Ctx()._symbol
_db
.get_symbol(id).name
)
1735 artifact_manager
.get_temp_file(config
.SYMBOL_OFFSETS_DB
), 'wb')
1736 cPickle
.dump(offsets
, offsets_db
, -1)
1739 def run(self
, run_options
, stats_keeper
):
1740 Log().quiet("Determining offsets for all symbolic names...")
1741 Ctx()._projects
= read_projects(
1742 artifact_manager
.get_temp_file(config
.PROJECTS
)
1744 Ctx()._symbol
_db
= SymbolDatabase()
1745 self
.generate_offsets_for_symbolings()
1746 Ctx()._symbol
_db
.close()
1747 Log().quiet("Done.")
1750 class OutputPass(Pass
):
1751 """This pass was formerly known as pass8."""
1753 def register_artifacts(self
):
1754 self
._register
_temp
_file
_needed
(config
.PROJECTS
)
1755 self
._register
_temp
_file
_needed
(config
.CVS_FILES_DB
)
1756 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_STORE
)
1757 self
._register
_temp
_file
_needed
(config
.CVS_ITEMS_SORTED_INDEX_TABLE
)
1758 self
._register
_temp
_file
_needed
(config
.SYMBOL_DB
)
1759 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_INDEX_TABLE
)
1760 self
._register
_temp
_file
_needed
(config
.METADATA_CLEAN_STORE
)
1761 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_INDEX_TABLE
)
1762 self
._register
_temp
_file
_needed
(config
.SVN_COMMITS_STORE
)
1763 self
._register
_temp
_file
_needed
(config
.CVS_REVS_TO_SVN_REVNUMS
)
1764 Ctx().output_option
.register_artifacts(self
)
1766 def run(self
, run_options
, stats_keeper
):
1767 Ctx()._projects
= read_projects(
1768 artifact_manager
.get_temp_file(config
.PROJECTS
)
1770 Ctx()._cvs
_file
_db
= CVSFileDatabase(DB_OPEN_READ
)
1771 Ctx()._metadata
_db
= MetadataDatabase(
1772 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_STORE
),
1773 artifact_manager
.get_temp_file(config
.METADATA_CLEAN_INDEX_TABLE
),
1776 Ctx()._cvs
_items
_db
= IndexedCVSItemStore(
1777 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_STORE
),
1778 artifact_manager
.get_temp_file(config
.CVS_ITEMS_SORTED_INDEX_TABLE
),
1780 Ctx()._symbol
_db
= SymbolDatabase()
1781 Ctx().persistence_manager
= PersistenceManager(DB_OPEN_READ
)
1783 Ctx().output_option
.setup(stats_keeper
.svn_rev_count())
1786 svn_commit
= Ctx().persistence_manager
.get_svn_commit(svn_revnum
)
1788 svn_commit
.output(Ctx().output_option
)
1790 svn_commit
= Ctx().persistence_manager
.get_svn_commit(svn_revnum
)
1792 Ctx().output_option
.cleanup()
1793 Ctx().persistence_manager
.close()
1795 Ctx()._symbol
_db
.close()
1796 Ctx()._cvs
_items
_db
.close()
1797 Ctx()._metadata
_db
.close()
1798 Ctx()._cvs
_file
_db
.close()
1801 # The list of passes constituting a run of cvs2svn:
1804 CleanMetadataPass(),
1805 CollateSymbolsPass(),
1806 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1807 FilterSymbolsPass(),
1808 SortRevisionSummaryPass(),
1809 SortSymbolSummaryPass(),
1810 InitializeChangesetsPass(),
1811 #CheckIndexedItemStoreDependenciesPass(
1812 # config.CVS_ITEMS_SORTED_STORE,
1813 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1814 BreakRevisionChangesetCyclesPass(),
1815 RevisionTopologicalSortPass(),
1816 BreakSymbolChangesetCyclesPass(),
1817 BreakAllChangesetCyclesPass(),
1818 TopologicalSortPass(),