Stash the PersistenceManager in the Ctx during OutputPass. I have need of
[cvs2svn.git] / cvs2svn_lib / passes.py
blob751d31496578ef46ce6467eb00f7b461ac1f412f
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module defines the passes that make up a conversion."""
20 import sys
21 import os
22 import shutil
23 import cPickle
25 from cvs2svn_lib import config
26 from cvs2svn_lib.context import Ctx
27 from cvs2svn_lib.common import warning_prefix
28 from cvs2svn_lib.common import FatalException
29 from cvs2svn_lib.common import FatalError
30 from cvs2svn_lib.common import InternalError
31 from cvs2svn_lib.common import DB_OPEN_NEW
32 from cvs2svn_lib.common import DB_OPEN_READ
33 from cvs2svn_lib.common import DB_OPEN_WRITE
34 from cvs2svn_lib.common import Timestamper
35 from cvs2svn_lib.log import Log
36 from cvs2svn_lib.pass_manager import Pass
37 from cvs2svn_lib.serializer import PrimedPickleSerializer
38 from cvs2svn_lib.artifact_manager import artifact_manager
39 from cvs2svn_lib.cvs_file_database import CVSFileDatabase
40 from cvs2svn_lib.metadata_database import MetadataDatabase
41 from cvs2svn_lib.project import read_projects
42 from cvs2svn_lib.project import write_projects
43 from cvs2svn_lib.symbol import LineOfDevelopment
44 from cvs2svn_lib.symbol import Trunk
45 from cvs2svn_lib.symbol import Symbol
46 from cvs2svn_lib.symbol import Branch
47 from cvs2svn_lib.symbol import Tag
48 from cvs2svn_lib.symbol import ExcludedSymbol
49 from cvs2svn_lib.symbol_database import SymbolDatabase
50 from cvs2svn_lib.symbol_database import create_symbol_database
51 from cvs2svn_lib.symbol_statistics import SymbolPlanError
52 from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
53 from cvs2svn_lib.symbol_statistics import SymbolStatistics
54 from cvs2svn_lib.cvs_item import CVSRevision
55 from cvs2svn_lib.cvs_item import CVSSymbol
56 from cvs2svn_lib.cvs_item_database import OldCVSItemStore
57 from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
58 from cvs2svn_lib.cvs_item_database import cvs_item_primer
59 from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
60 from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
61 from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
62 from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
63 from cvs2svn_lib.key_generator import KeyGenerator
64 from cvs2svn_lib.changeset import RevisionChangeset
65 from cvs2svn_lib.changeset import OrderedChangeset
66 from cvs2svn_lib.changeset import SymbolChangeset
67 from cvs2svn_lib.changeset import BranchChangeset
68 from cvs2svn_lib.changeset import create_symbol_changeset
69 from cvs2svn_lib.changeset_graph import ChangesetGraph
70 from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
71 from cvs2svn_lib.changeset_database import ChangesetDatabase
72 from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
73 from cvs2svn_lib.svn_commit import SVNRevisionCommit
74 from cvs2svn_lib.openings_closings import SymbolingsLogger
75 from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
76 from cvs2svn_lib.persistence_manager import PersistenceManager
77 from cvs2svn_lib.collect_data import CollectData
78 from cvs2svn_lib.process import call_command
79 from cvs2svn_lib.check_dependencies_pass \
80 import CheckItemStoreDependenciesPass
81 from cvs2svn_lib.check_dependencies_pass \
82 import CheckIndexedItemStoreDependenciesPass
85 def sort_file(infilename, outfilename, options=[]):
86 """Sort file INFILENAME, storing the results to OUTFILENAME.
88 OPTIONS is an optional list of strings that are passed as additional
89 options to the sort command."""
91 # GNU sort will sort our dates differently (incorrectly!) if our
92 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
93 # it to 'C'
94 lc_all_tmp = os.environ.get('LC_ALL', None)
95 os.environ['LC_ALL'] = 'C'
97 # The -T option to sort has a nice side effect. The Win32 sort is
98 # case insensitive and cannot be used, and since it does not
99 # understand the -T option and dies if we try to use it, there is no
100 # risk that we use that sort by accident.
101 command = [
102 Ctx().sort_executable,
103 '-T', Ctx().tmpdir
104 ] + options + [
105 infilename
108 try:
109 # Under Windows, the subprocess module uses the Win32
110 # CreateProcess, which always looks in the Windows system32
111 # directory before it looks in the directories listed in the PATH
112 # environment variable. Since the Windows sort.exe is in the
113 # system32 directory it will always be chosen. A simple
114 # workaround is to launch the sort in a shell. When the shell
115 # (cmd.exe) searches it only examines the directories in the PATH
116 # so putting the directory with GNU sort ahead of the Windows
117 # system32 directory will cause GNU sort to be chosen.
118 call_command(
119 command, stdout=open(outfilename, 'w'), shell=(sys.platform=='win32')
121 finally:
122 if lc_all_tmp is None:
123 del os.environ['LC_ALL']
124 else:
125 os.environ['LC_ALL'] = lc_all_tmp
127 # On some versions of Windows, os.system() does not return an error
128 # if the command fails. So add little consistency tests here that
129 # the output file was created and has the right size:
131 if not os.path.exists(outfilename):
132 raise FatalError('Sort output file missing: %r' % (outfilename,))
134 if os.path.getsize(outfilename) != os.path.getsize(infilename):
135 raise FatalError(
136 'Sort input and output file sizes differ:\n'
137 ' %r (%d bytes)\n'
138 ' %r (%d bytes)' % (
139 infilename, os.path.getsize(infilename),
140 outfilename, os.path.getsize(outfilename),
145 class CollectRevsPass(Pass):
146 """This pass was formerly known as pass1."""
148 def register_artifacts(self):
149 self._register_temp_file(config.PROJECTS)
150 self._register_temp_file(config.SYMBOL_STATISTICS)
151 self._register_temp_file(config.METADATA_INDEX_TABLE)
152 self._register_temp_file(config.METADATA_STORE)
153 self._register_temp_file(config.CVS_FILES_DB)
154 self._register_temp_file(config.CVS_ITEMS_STORE)
155 Ctx().revision_recorder.register_artifacts(self)
157 def run(self, run_options, stats_keeper):
158 Log().quiet("Examining all CVS ',v' files...")
159 Ctx()._projects = {}
160 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW)
161 cd = CollectData(Ctx().revision_recorder, stats_keeper)
162 for project in run_options.projects:
163 cd.process_project(project)
164 run_options.projects = None
166 fatal_errors = cd.close()
168 if fatal_errors:
169 raise FatalException("Pass 1 complete.\n"
170 + "=" * 75 + "\n"
171 + "Error summary:\n"
172 + "\n".join(fatal_errors) + "\n"
173 + "Exited due to fatal error(s).")
175 Ctx()._cvs_file_db.close()
176 write_projects(artifact_manager.get_temp_file(config.PROJECTS))
177 Log().quiet("Done")
180 class CleanMetadataPass(Pass):
181 """Clean up CVS revision metadata and write it to a new database."""
183 def register_artifacts(self):
184 self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
185 self._register_temp_file(config.METADATA_CLEAN_STORE)
186 self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
187 self._register_temp_file_needed(config.METADATA_STORE)
189 def _get_clean_author(self, author):
190 """Return AUTHOR, converted appropriately to UTF8.
192 Raise a UnicodeException if it cannot be converted using the
193 configured cvs_author_decoder."""
195 try:
196 return self._authors[author]
197 except KeyError:
198 pass
200 try:
201 clean_author = Ctx().cvs_author_decoder(author)
202 except UnicodeError:
203 self._authors[author] = author
204 raise UnicodeError('Problem decoding author \'%s\'' % (author,))
206 try:
207 clean_author = clean_author.encode('utf8')
208 except UnicodeError:
209 self._authors[author] = author
210 raise UnicodeError('Problem encoding author \'%s\'' % (author,))
212 self._authors[author] = clean_author
213 return clean_author
215 def _get_clean_log_msg(self, log_msg):
216 """Return LOG_MSG, converted appropriately to UTF8.
218 Raise a UnicodeException if it cannot be converted using the
219 configured cvs_log_decoder."""
221 try:
222 clean_log_msg = Ctx().cvs_log_decoder(log_msg)
223 except UnicodeError:
224 raise UnicodeError(
225 'Problem decoding log message:\n'
226 '%s\n'
227 '%s\n'
228 '%s'
229 % ('-' * 75, log_msg, '-' * 75,)
232 try:
233 return clean_log_msg.encode('utf8')
234 except UnicodeError:
235 raise UnicodeError(
236 'Problem encoding log message:\n'
237 '%s\n'
238 '%s\n'
239 '%s'
240 % ('-' * 75, log_msg, '-' * 75,)
243 def _clean_metadata(self, metadata):
244 """Clean up METADATA by overwriting its members as necessary."""
246 try:
247 metadata.author = self._get_clean_author(metadata.author)
248 except UnicodeError, e:
249 Log().warn('%s: %s' % (warning_prefix, e,))
250 self.warnings = True
252 try:
253 metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
254 except UnicodeError, e:
255 Log().warn('%s: %s' % (warning_prefix, e,))
256 self.warnings = True
258 def run(self, run_options, stats_keeper):
259 Log().quiet("Converting metadata to UTF8...")
260 metadata_db = MetadataDatabase(
261 artifact_manager.get_temp_file(config.METADATA_STORE),
262 artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
263 DB_OPEN_READ,
265 metadata_clean_db = MetadataDatabase(
266 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
267 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
268 DB_OPEN_NEW,
271 self.warnings = False
273 # A map {author : clean_author} for those known (to avoid
274 # repeating warnings):
275 self._authors = {}
277 for id in metadata_db.iterkeys():
278 metadata = metadata_db[id]
280 # Record the original author name because it might be needed for
281 # expanding CVS keywords:
282 metadata.original_author = metadata.author
284 self._clean_metadata(metadata)
286 metadata_clean_db[id] = metadata
288 if self.warnings:
289 raise FatalError(
290 'There were warnings converting author names and/or log messages\n'
291 'to Unicode (see messages above). Please restart this pass\n'
292 'with one or more \'--encoding\' parameters or with\n'
293 '\'--fallback-encoding\'.'
296 metadata_clean_db.close()
297 metadata_db.close()
298 Log().quiet("Done")
301 class CollateSymbolsPass(Pass):
302 """Divide symbols into branches, tags, and excludes."""
304 conversion_names = {
305 Trunk : 'trunk',
306 Branch : 'branch',
307 Tag : 'tag',
308 ExcludedSymbol : 'exclude',
309 Symbol : '.',
312 def register_artifacts(self):
313 self._register_temp_file(config.SYMBOL_DB)
314 self._register_temp_file_needed(config.PROJECTS)
315 self._register_temp_file_needed(config.SYMBOL_STATISTICS)
317 def get_symbol(self, run_options, stats):
318 """Use StrategyRules to decide what to do with a symbol.
320 STATS is an instance of symbol_statistics._Stats describing an
321 instance of Symbol or Trunk. To determine how the symbol is to be
322 converted, consult the StrategyRules in the project's
323 symbol_strategy_rules. Each rule is allowed a chance to change
324 the way the symbol will be converted. If the symbol is not a
325 Trunk or TypedSymbol after all rules have run, raise
326 IndeterminateSymbolException."""
328 symbol = stats.lod
329 rules = run_options.project_symbol_strategy_rules[symbol.project.id]
330 for rule in rules:
331 symbol = rule.get_symbol(symbol, stats)
332 assert symbol is not None
334 stats.check_valid(symbol)
336 return symbol
338 def log_symbol_summary(self, stats, symbol):
339 if not self.symbol_info_file:
340 return
342 if isinstance(symbol, Trunk):
343 name = '.trunk.'
344 preferred_parent_name = '.'
345 else:
346 name = stats.lod.name
347 if symbol.preferred_parent_id is None:
348 preferred_parent_name = '.'
349 else:
350 preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
351 if isinstance(preferred_parent, Trunk):
352 preferred_parent_name = '.trunk.'
353 else:
354 preferred_parent_name = preferred_parent.name
356 if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
357 symbol_path = symbol.base_path
358 else:
359 symbol_path = '.'
361 self.symbol_info_file.write(
362 '%-5d %-30s %-10s %s %s\n' % (
363 stats.lod.project.id,
364 name,
365 self.conversion_names[symbol.__class__],
366 symbol_path,
367 preferred_parent_name,
370 self.symbol_info_file.write(' # %s\n' % (stats,))
371 parent_counts = stats.possible_parents.items()
372 if parent_counts:
373 self.symbol_info_file.write(' # Possible parents:\n')
374 parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
375 for (pp, count) in parent_counts:
376 if isinstance(pp, Trunk):
377 self.symbol_info_file.write(
378 ' # .trunk. : %d\n' % (count,)
380 else:
381 self.symbol_info_file.write(
382 ' # %s : %d\n' % (pp.name, count,)
385 def get_symbols(self, run_options):
386 """Return a map telling how to convert symbols.
388 The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
389 indicating how each symbol should be converted. Trunk objects in
390 SYMBOL_STATS are passed through unchanged. One object is included
391 in the return value for each line of development described in
392 SYMBOL_STATS.
394 Raise FatalError if there was an error."""
396 errors = []
397 mismatches = []
399 if Ctx().symbol_info_filename is not None:
400 self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
401 self.symbol_info_file.write(
402 '# Columns: project_id symbol_name conversion symbol_path '
403 'preferred_parent_name\n'
405 else:
406 self.symbol_info_file = None
408 # Initialize each symbol strategy rule a single time, even if it
409 # is used in more than one project. First define a map from
410 # object id to symbol strategy rule:
411 rules = {}
412 for rule_list in run_options.project_symbol_strategy_rules:
413 for rule in rule_list:
414 rules[id(rule)] = rule
416 for rule in rules.itervalues():
417 rule.start(self.symbol_stats)
419 retval = {}
421 for stats in self.symbol_stats:
422 try:
423 symbol = self.get_symbol(run_options, stats)
424 except IndeterminateSymbolException, e:
425 self.log_symbol_summary(stats, stats.lod)
426 mismatches.append(e.stats)
427 except SymbolPlanError, e:
428 self.log_symbol_summary(stats, stats.lod)
429 errors.append(e)
430 else:
431 self.log_symbol_summary(stats, symbol)
432 retval[stats.lod] = symbol
434 for rule in rules.itervalues():
435 rule.finish()
437 if self.symbol_info_file:
438 self.symbol_info_file.close()
440 del self.symbol_info_file
442 if errors or mismatches:
443 s = ['Problems determining how symbols should be converted:\n']
444 for e in errors:
445 s.append('%s\n' % (e,))
446 if mismatches:
447 s.append(
448 'It is not clear how the following symbols '
449 'should be converted.\n'
450 'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
451 'and/or\n'
452 '--symbol-default to resolve the ambiguity.\n'
454 for stats in mismatches:
455 s.append(' %s\n' % (stats,))
456 raise FatalError(''.join(s))
457 else:
458 return retval
460 def run(self, run_options, stats_keeper):
461 Ctx()._projects = read_projects(
462 artifact_manager.get_temp_file(config.PROJECTS)
464 self.symbol_stats = SymbolStatistics(
465 artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
468 symbol_map = self.get_symbols(run_options)
470 # Check the symbols for consistency and bail out if there were errors:
471 self.symbol_stats.check_consistency(symbol_map)
473 # Check that the symbols all have SVN paths set and that the paths
474 # are disjoint:
475 Ctx().output_option.check_symbols(symbol_map)
477 for symbol in symbol_map.itervalues():
478 if isinstance(symbol, ExcludedSymbol):
479 self.symbol_stats.exclude_symbol(symbol)
481 create_symbol_database(symbol_map.values())
483 del self.symbol_stats
485 Log().quiet("Done")
488 class FilterSymbolsPass(Pass):
489 """Delete any branches/tags that are to be excluded.
491 Also delete revisions on excluded branches, and delete other
492 references to the excluded symbols."""
494 def register_artifacts(self):
495 self._register_temp_file(config.SUMMARY_SERIALIZER)
496 self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE)
497 self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
498 self._register_temp_file_needed(config.PROJECTS)
499 self._register_temp_file_needed(config.SYMBOL_DB)
500 self._register_temp_file_needed(config.CVS_FILES_DB)
501 self._register_temp_file_needed(config.CVS_ITEMS_STORE)
502 Ctx().revision_excluder.register_artifacts(self)
504 def run(self, run_options, stats_keeper):
505 Ctx()._projects = read_projects(
506 artifact_manager.get_temp_file(config.PROJECTS)
508 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
509 Ctx()._symbol_db = SymbolDatabase()
510 cvs_item_store = OldCVSItemStore(
511 artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
513 cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
514 f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb')
515 cPickle.dump(cvs_item_serializer, f, -1)
516 f.close()
518 rev_db = NewSortableCVSRevisionDatabase(
519 artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
520 cvs_item_serializer,
523 symbol_db = NewSortableCVSSymbolDatabase(
524 artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
525 cvs_item_serializer,
528 revision_excluder = Ctx().revision_excluder
530 Log().quiet("Filtering out excluded symbols and summarizing items...")
532 stats_keeper.reset_cvs_rev_info()
533 revision_excluder.start()
535 # Process the cvs items store one file at a time:
536 for cvs_file_items in cvs_item_store.iter_cvs_file_items():
537 Log().verbose(cvs_file_items.cvs_file.filename)
538 cvs_file_items.filter_excluded_symbols(revision_excluder)
539 cvs_file_items.mutate_symbols()
540 cvs_file_items.adjust_parents()
541 cvs_file_items.refine_symbols()
542 cvs_file_items.record_opened_symbols()
543 cvs_file_items.record_closed_symbols()
544 cvs_file_items.check_link_consistency()
546 # Store whatever is left to the new file and update statistics:
547 stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
548 for cvs_item in cvs_file_items.values():
549 stats_keeper.record_cvs_item(cvs_item)
551 if isinstance(cvs_item, CVSRevision):
552 rev_db.add(cvs_item)
553 elif isinstance(cvs_item, CVSSymbol):
554 symbol_db.add(cvs_item)
556 stats_keeper.set_stats_reflect_exclude(True)
558 rev_db.close()
559 symbol_db.close()
560 revision_excluder.finish()
561 cvs_item_store.close()
562 Ctx()._symbol_db.close()
563 Ctx()._cvs_file_db.close()
565 Log().quiet("Done")
568 class SortRevisionSummaryPass(Pass):
569 """Sort the revision summary file."""
571 def register_artifacts(self):
572 self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
573 self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE)
575 def run(self, run_options, stats_keeper):
576 Log().quiet("Sorting CVS revision summaries...")
577 sort_file(
578 artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
579 artifact_manager.get_temp_file(
580 config.CVS_REVS_SUMMARY_SORTED_DATAFILE))
581 Log().quiet("Done")
584 class SortSymbolSummaryPass(Pass):
585 """Sort the symbol summary file."""
587 def register_artifacts(self):
588 self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
589 self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
591 def run(self, run_options, stats_keeper):
592 Log().quiet("Sorting CVS symbol summaries...")
593 sort_file(
594 artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
595 artifact_manager.get_temp_file(
596 config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE))
597 Log().quiet("Done")
600 class InitializeChangesetsPass(Pass):
601 """Create preliminary CommitSets."""
603 def register_artifacts(self):
604 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
605 self._register_temp_file(config.CHANGESETS_STORE)
606 self._register_temp_file(config.CHANGESETS_INDEX)
607 self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
608 self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
609 self._register_temp_file_needed(config.PROJECTS)
610 self._register_temp_file_needed(config.SYMBOL_DB)
611 self._register_temp_file_needed(config.CVS_FILES_DB)
612 self._register_temp_file_needed(config.SUMMARY_SERIALIZER)
613 self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
614 self._register_temp_file_needed(
615 config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
617 def get_revision_changesets(self):
618 """Generate revision changesets, one at a time.
620 Each time, yield a list of CVSRevisions that might potentially
621 consititute a changeset."""
623 # Create changesets for CVSRevisions:
624 old_metadata_id = None
625 old_timestamp = None
626 changeset_items = []
628 db = OldSortableCVSRevisionDatabase(
629 artifact_manager.get_temp_file(
630 config.CVS_REVS_SUMMARY_SORTED_DATAFILE
632 self.cvs_item_serializer,
635 for cvs_rev in db:
636 if cvs_rev.metadata_id != old_metadata_id \
637 or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
638 # Start a new changeset. First finish up the old changeset,
639 # if any:
640 if changeset_items:
641 yield changeset_items
642 changeset_items = []
643 old_metadata_id = cvs_rev.metadata_id
644 changeset_items.append(cvs_rev)
645 old_timestamp = cvs_rev.timestamp
647 # Finish up the last changeset, if any:
648 if changeset_items:
649 yield changeset_items
651 def get_symbol_changesets(self):
652 """Generate symbol changesets, one at a time.
654 Each time, yield a list of CVSSymbols that might potentially
655 consititute a changeset."""
657 old_symbol_id = None
658 changeset_items = []
660 db = OldSortableCVSSymbolDatabase(
661 artifact_manager.get_temp_file(
662 config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
664 self.cvs_item_serializer,
667 for cvs_symbol in db:
668 if cvs_symbol.symbol.id != old_symbol_id:
669 # Start a new changeset. First finish up the old changeset,
670 # if any:
671 if changeset_items:
672 yield changeset_items
673 changeset_items = []
674 old_symbol_id = cvs_symbol.symbol.id
675 changeset_items.append(cvs_symbol)
677 # Finish up the last changeset, if any:
678 if changeset_items:
679 yield changeset_items
681 @staticmethod
682 def compare_items(a, b):
683 return (
684 cmp(a.timestamp, b.timestamp)
685 or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
686 or cmp([int(x) for x in a.rev.split('.')],
687 [int(x) for x in b.rev.split('.')])
688 or cmp(a.id, b.id))
690 def break_internal_dependencies(self, changeset_items):
691 """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
693 CHANGESET_ITEMS is a list of CVSRevisions that could possibly
694 belong in a single RevisionChangeset, but there might be internal
695 dependencies among the items. Return a list of lists, where each
696 sublist is a list of CVSRevisions and at least one internal
697 dependency has been eliminated. Iff CHANGESET_ITEMS does not have
698 to be split, then the return value will contain a single value,
699 namely the original value of CHANGESET_ITEMS. Split
700 CHANGESET_ITEMS at most once, even though the resulting changesets
701 might themselves have internal dependencies."""
703 # We only look for succ dependencies, since by doing so we
704 # automatically cover pred dependencies as well. First create a
705 # list of tuples (pred, succ) of id pairs for CVSItems that depend
706 # on each other.
707 dependencies = []
708 changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
709 for cvs_item in changeset_items:
710 for next_id in cvs_item.get_succ_ids():
711 if next_id in changeset_cvs_item_ids:
712 # Sanity check: a CVSItem should never depend on itself:
713 if next_id == cvs_item.id:
714 raise InternalError('Item depends on itself: %s' % (cvs_item,))
716 dependencies.append((cvs_item.id, next_id,))
718 if dependencies:
719 # Sort the changeset_items in a defined order (chronological to the
720 # extent that the timestamps are correct and unique).
721 changeset_items.sort(self.compare_items)
722 indexes = {}
723 for (i, changeset_item) in enumerate(changeset_items):
724 indexes[changeset_item.id] = i
725 # How many internal dependencies would be broken by breaking the
726 # Changeset after a particular index?
727 breaks = [0] * len(changeset_items)
728 for (pred, succ,) in dependencies:
729 pred_index = indexes[pred]
730 succ_index = indexes[succ]
731 breaks[min(pred_index, succ_index)] += 1
732 breaks[max(pred_index, succ_index)] -= 1
733 best_i = None
734 best_count = -1
735 best_time = 0
736 for i in range(1, len(breaks)):
737 breaks[i] += breaks[i - 1]
738 for i in range(0, len(breaks) - 1):
739 if breaks[i] > best_count:
740 best_i = i
741 best_count = breaks[i]
742 best_time = (changeset_items[i + 1].timestamp
743 - changeset_items[i].timestamp)
744 elif breaks[i] == best_count \
745 and (changeset_items[i + 1].timestamp
746 - changeset_items[i].timestamp) < best_time:
747 best_i = i
748 best_count = breaks[i]
749 best_time = (changeset_items[i + 1].timestamp
750 - changeset_items[i].timestamp)
751 # Reuse the old changeset.id for the first of the split changesets.
752 return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
753 else:
754 return [changeset_items]
756 def break_all_internal_dependencies(self, changeset_items):
757 """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
759 CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
760 be part of a single changeset. Break this list into sublists,
761 where the CVSRevisions in each sublist are free of mutual
762 dependencies."""
764 # This method is written non-recursively to avoid any possible
765 # problems with recursion depth.
767 changesets_to_split = [changeset_items]
768 while changesets_to_split:
769 changesets = self.break_internal_dependencies(changesets_to_split.pop())
770 if len(changesets) == 1:
771 [changeset_items] = changesets
772 yield changeset_items
773 else:
774 # The changeset had to be split; see if either of the
775 # fragments have to be split:
776 changesets.reverse()
777 changesets_to_split.extend(changesets)
779 def get_changesets(self):
780 """Generate (Changeset, [CVSItem,...]) for all changesets.
782 The Changesets already have their internal dependencies broken.
783 The [CVSItem,...] list is the list of CVSItems in the
784 corresponding Changeset."""
786 for changeset_items in self.get_revision_changesets():
787 for split_changeset_items \
788 in self.break_all_internal_dependencies(changeset_items):
789 yield (
790 RevisionChangeset(
791 self.changeset_key_generator.gen_id(),
792 [cvs_rev.id for cvs_rev in split_changeset_items]
794 split_changeset_items,
797 for changeset_items in self.get_symbol_changesets():
798 yield (
799 create_symbol_changeset(
800 self.changeset_key_generator.gen_id(),
801 changeset_items[0].symbol,
802 [cvs_symbol.id for cvs_symbol in changeset_items]
804 changeset_items,
807 def run(self, run_options, stats_keeper):
808 Log().quiet("Creating preliminary commit sets...")
810 Ctx()._projects = read_projects(
811 artifact_manager.get_temp_file(config.PROJECTS)
813 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
814 Ctx()._symbol_db = SymbolDatabase()
816 f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb')
817 self.cvs_item_serializer = cPickle.load(f)
818 f.close()
820 changeset_db = ChangesetDatabase(
821 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
822 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
823 DB_OPEN_NEW,
825 cvs_item_to_changeset_id = CVSItemToChangesetTable(
826 artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
827 DB_OPEN_NEW,
830 self.sorted_cvs_items_db = IndexedCVSItemStore(
831 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
832 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
833 DB_OPEN_NEW)
835 self.changeset_key_generator = KeyGenerator()
837 for (changeset, changeset_items) in self.get_changesets():
838 if Log().is_on(Log.DEBUG):
839 Log().debug(repr(changeset))
840 changeset_db.store(changeset)
841 for cvs_item in changeset_items:
842 self.sorted_cvs_items_db.add(cvs_item)
843 cvs_item_to_changeset_id[cvs_item.id] = changeset.id
845 self.sorted_cvs_items_db.close()
846 cvs_item_to_changeset_id.close()
847 changeset_db.close()
848 Ctx()._symbol_db.close()
849 Ctx()._cvs_file_db.close()
851 del self.cvs_item_serializer
853 Log().quiet("Done")
856 class ProcessedChangesetLogger:
857 def __init__(self):
858 self.processed_changeset_ids = []
860 def log(self, changeset_id):
861 if Log().is_on(Log.DEBUG):
862 self.processed_changeset_ids.append(changeset_id)
864 def flush(self):
865 if self.processed_changeset_ids:
866 Log().debug(
867 'Consumed changeset ids %s'
868 % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
870 del self.processed_changeset_ids[:]
873 class BreakRevisionChangesetCyclesPass(Pass):
874 """Break up any dependency cycles involving only RevisionChangesets."""
876 def register_artifacts(self):
877 self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
878 self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
879 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
880 self._register_temp_file_needed(config.PROJECTS)
881 self._register_temp_file_needed(config.SYMBOL_DB)
882 self._register_temp_file_needed(config.CVS_FILES_DB)
883 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
884 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
885 self._register_temp_file_needed(config.CHANGESETS_STORE)
886 self._register_temp_file_needed(config.CHANGESETS_INDEX)
887 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
889 def get_source_changesets(self):
890 old_changeset_db = ChangesetDatabase(
891 artifact_manager.get_temp_file(config.CHANGESETS_STORE),
892 artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
893 DB_OPEN_READ)
895 changeset_ids = old_changeset_db.keys()
897 for changeset_id in changeset_ids:
898 yield old_changeset_db[changeset_id]
900 old_changeset_db.close()
901 del old_changeset_db
903 def break_cycle(self, cycle):
904 """Break up one or more changesets in CYCLE to help break the cycle.
906 CYCLE is a list of Changesets where
908 cycle[i] depends on cycle[i - 1]
910 Break up one or more changesets in CYCLE to make progress towards
911 breaking the cycle. Update self.changeset_graph accordingly.
913 It is not guaranteed that the cycle will be broken by one call to
914 this routine, but at least some progress must be made."""
916 self.processed_changeset_logger.flush()
917 best_i = None
918 best_link = None
919 for i in range(len(cycle)):
920 # It's OK if this index wraps to -1:
921 link = ChangesetGraphLink(
922 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
924 if best_i is None or link < best_link:
925 best_i = i
926 best_link = link
928 if Log().is_on(Log.DEBUG):
929 Log().debug(
930 'Breaking cycle %s by breaking node %x' % (
931 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
932 best_link.changeset.id,))
934 new_changesets = best_link.break_changeset(self.changeset_key_generator)
936 self.changeset_graph.delete_changeset(best_link.changeset)
938 for changeset in new_changesets:
939 self.changeset_graph.add_new_changeset(changeset)
941 def run(self, run_options, stats_keeper):
942 Log().quiet("Breaking revision changeset dependency cycles...")
944 Ctx()._projects = read_projects(
945 artifact_manager.get_temp_file(config.PROJECTS)
947 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
948 Ctx()._symbol_db = SymbolDatabase()
949 Ctx()._cvs_items_db = IndexedCVSItemStore(
950 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
951 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
952 DB_OPEN_READ)
954 shutil.copyfile(
955 artifact_manager.get_temp_file(
956 config.CVS_ITEM_TO_CHANGESET),
957 artifact_manager.get_temp_file(
958 config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
959 cvs_item_to_changeset_id = CVSItemToChangesetTable(
960 artifact_manager.get_temp_file(
961 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
962 DB_OPEN_WRITE)
964 changeset_db = ChangesetDatabase(
965 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
966 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
967 DB_OPEN_NEW)
969 self.changeset_graph = ChangesetGraph(
970 changeset_db, cvs_item_to_changeset_id
973 max_changeset_id = 0
974 for changeset in self.get_source_changesets():
975 changeset_db.store(changeset)
976 if isinstance(changeset, RevisionChangeset):
977 self.changeset_graph.add_changeset(changeset)
978 max_changeset_id = max(max_changeset_id, changeset.id)
980 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
982 self.processed_changeset_logger = ProcessedChangesetLogger()
984 # Consume the graph, breaking cycles using self.break_cycle():
985 for (changeset, time_range) in self.changeset_graph.consume_graph(
986 cycle_breaker=self.break_cycle
988 self.processed_changeset_logger.log(changeset.id)
990 self.processed_changeset_logger.flush()
991 del self.processed_changeset_logger
993 self.changeset_graph.close()
994 self.changeset_graph = None
995 Ctx()._cvs_items_db.close()
996 Ctx()._symbol_db.close()
997 Ctx()._cvs_file_db.close()
999 Log().quiet("Done")
1002 class RevisionTopologicalSortPass(Pass):
1003 """Sort RevisionChangesets into commit order.
1005 Also convert them to OrderedChangesets, without changing their ids."""
1007 def register_artifacts(self):
1008 self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
1009 self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
1010 self._register_temp_file_needed(config.PROJECTS)
1011 self._register_temp_file_needed(config.SYMBOL_DB)
1012 self._register_temp_file_needed(config.CVS_FILES_DB)
1013 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1014 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1015 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
1016 self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
1017 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1019 def get_source_changesets(self, changeset_db):
1020 changeset_ids = changeset_db.keys()
1022 for changeset_id in changeset_ids:
1023 yield changeset_db[changeset_id]
1025 def get_changesets(self):
1026 changeset_db = ChangesetDatabase(
1027 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
1028 artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
1029 DB_OPEN_READ,
1032 changeset_graph = ChangesetGraph(
1033 changeset_db,
1034 CVSItemToChangesetTable(
1035 artifact_manager.get_temp_file(
1036 config.CVS_ITEM_TO_CHANGESET_REVBROKEN
1038 DB_OPEN_READ,
1042 for changeset in self.get_source_changesets(changeset_db):
1043 if isinstance(changeset, RevisionChangeset):
1044 changeset_graph.add_changeset(changeset)
1045 else:
1046 yield changeset
1048 changeset_ids = []
1050 # Sentry:
1051 changeset_ids.append(None)
1053 for (changeset, time_range) in changeset_graph.consume_graph():
1054 changeset_ids.append(changeset.id)
1056 # Sentry:
1057 changeset_ids.append(None)
1059 for i in range(1, len(changeset_ids) - 1):
1060 changeset = changeset_db[changeset_ids[i]]
1061 yield OrderedChangeset(
1062 changeset.id, changeset.cvs_item_ids, i - 1,
1063 changeset_ids[i - 1], changeset_ids[i + 1])
1065 changeset_graph.close()
1067 def run(self, run_options, stats_keeper):
1068 Log().quiet("Generating CVSRevisions in commit order...")
1070 Ctx()._projects = read_projects(
1071 artifact_manager.get_temp_file(config.PROJECTS)
1073 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1074 Ctx()._symbol_db = SymbolDatabase()
1075 Ctx()._cvs_items_db = IndexedCVSItemStore(
1076 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1077 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1078 DB_OPEN_READ)
1080 changesets_revordered_db = ChangesetDatabase(
1081 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1082 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1083 DB_OPEN_NEW)
1085 for changeset in self.get_changesets():
1086 changesets_revordered_db.store(changeset)
1088 changesets_revordered_db.close()
1089 Ctx()._cvs_items_db.close()
1090 Ctx()._symbol_db.close()
1091 Ctx()._cvs_file_db.close()
1093 Log().quiet("Done")
1096 class BreakSymbolChangesetCyclesPass(Pass):
1097 """Break up any dependency cycles involving only SymbolChangesets."""
1099 def register_artifacts(self):
1100 self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
1101 self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
1102 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1103 self._register_temp_file_needed(config.PROJECTS)
1104 self._register_temp_file_needed(config.SYMBOL_DB)
1105 self._register_temp_file_needed(config.CVS_FILES_DB)
1106 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1107 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1108 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
1109 self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
1110 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
1112 def get_source_changesets(self):
1113 old_changeset_db = ChangesetDatabase(
1114 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
1115 artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
1116 DB_OPEN_READ)
1118 changeset_ids = old_changeset_db.keys()
1120 for changeset_id in changeset_ids:
1121 yield old_changeset_db[changeset_id]
1123 old_changeset_db.close()
1125 def break_cycle(self, cycle):
1126 """Break up one or more changesets in CYCLE to help break the cycle.
1128 CYCLE is a list of Changesets where
1130 cycle[i] depends on cycle[i - 1]
1132 Break up one or more changesets in CYCLE to make progress towards
1133 breaking the cycle. Update self.changeset_graph accordingly.
1135 It is not guaranteed that the cycle will be broken by one call to
1136 this routine, but at least some progress must be made."""
1138 self.processed_changeset_logger.flush()
1139 best_i = None
1140 best_link = None
1141 for i in range(len(cycle)):
1142 # It's OK if this index wraps to -1:
1143 link = ChangesetGraphLink(
1144 cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
1146 if best_i is None or link < best_link:
1147 best_i = i
1148 best_link = link
1150 if Log().is_on(Log.DEBUG):
1151 Log().debug(
1152 'Breaking cycle %s by breaking node %x' % (
1153 ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
1154 best_link.changeset.id,))
1156 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1158 self.changeset_graph.delete_changeset(best_link.changeset)
1160 for changeset in new_changesets:
1161 self.changeset_graph.add_new_changeset(changeset)
1163 def run(self, run_options, stats_keeper):
1164 Log().quiet("Breaking symbol changeset dependency cycles...")
1166 Ctx()._projects = read_projects(
1167 artifact_manager.get_temp_file(config.PROJECTS)
1169 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1170 Ctx()._symbol_db = SymbolDatabase()
1171 Ctx()._cvs_items_db = IndexedCVSItemStore(
1172 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1173 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1174 DB_OPEN_READ)
1176 shutil.copyfile(
1177 artifact_manager.get_temp_file(
1178 config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
1179 artifact_manager.get_temp_file(
1180 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
1181 cvs_item_to_changeset_id = CVSItemToChangesetTable(
1182 artifact_manager.get_temp_file(
1183 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1184 DB_OPEN_WRITE)
1186 changeset_db = ChangesetDatabase(
1187 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1188 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1189 DB_OPEN_NEW)
1191 self.changeset_graph = ChangesetGraph(
1192 changeset_db, cvs_item_to_changeset_id
1195 max_changeset_id = 0
1196 for changeset in self.get_source_changesets():
1197 changeset_db.store(changeset)
1198 if isinstance(changeset, SymbolChangeset):
1199 self.changeset_graph.add_changeset(changeset)
1200 max_changeset_id = max(max_changeset_id, changeset.id)
1202 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1204 self.processed_changeset_logger = ProcessedChangesetLogger()
1206 # Consume the graph, breaking cycles using self.break_cycle():
1207 for (changeset, time_range) in self.changeset_graph.consume_graph(
1208 cycle_breaker=self.break_cycle
1210 self.processed_changeset_logger.log(changeset.id)
1212 self.processed_changeset_logger.flush()
1213 del self.processed_changeset_logger
1215 self.changeset_graph.close()
1216 self.changeset_graph = None
1217 Ctx()._cvs_items_db.close()
1218 Ctx()._symbol_db.close()
1219 Ctx()._cvs_file_db.close()
1221 Log().quiet("Done")
1224 class BreakAllChangesetCyclesPass(Pass):
1225 """Break up any dependency cycles that are closed by SymbolChangesets."""
1227 def register_artifacts(self):
1228 self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
1229 self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
1230 self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1231 self._register_temp_file_needed(config.PROJECTS)
1232 self._register_temp_file_needed(config.SYMBOL_DB)
1233 self._register_temp_file_needed(config.CVS_FILES_DB)
1234 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1235 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1236 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
1237 self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
1238 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
1240 def get_source_changesets(self):
1241 old_changeset_db = ChangesetDatabase(
1242 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
1243 artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
1244 DB_OPEN_READ)
1246 changeset_ids = old_changeset_db.keys()
1248 for changeset_id in changeset_ids:
1249 yield old_changeset_db[changeset_id]
1251 old_changeset_db.close()
1253 def _split_retrograde_changeset(self, changeset):
1254 """CHANGESET is retrograde. Split it into non-retrograde changesets."""
1256 Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
1258 self.changeset_graph.delete_changeset(changeset)
1260 # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
1261 ordinal_limits = {}
1262 for cvs_branch in changeset.iter_cvs_items():
1263 max_pred_ordinal = 0
1264 min_succ_ordinal = sys.maxint
1266 for pred_id in cvs_branch.get_pred_ids():
1267 pred_ordinal = self.ordinals.get(
1268 self.cvs_item_to_changeset_id[pred_id], 0)
1269 max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
1271 for succ_id in cvs_branch.get_succ_ids():
1272 succ_ordinal = self.ordinals.get(
1273 self.cvs_item_to_changeset_id[succ_id], sys.maxint)
1274 min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
1276 assert max_pred_ordinal < min_succ_ordinal
1277 ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
1279 # Find the earliest successor ordinal:
1280 min_min_succ_ordinal = sys.maxint
1281 for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
1282 min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
1284 early_item_ids = []
1285 late_item_ids = []
1286 for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
1287 if max_pred_ordinal >= min_min_succ_ordinal:
1288 late_item_ids.append(id)
1289 else:
1290 early_item_ids.append(id)
1292 assert early_item_ids
1293 assert late_item_ids
1295 early_changeset = changeset.create_split_changeset(
1296 self.changeset_key_generator.gen_id(), early_item_ids)
1297 late_changeset = changeset.create_split_changeset(
1298 self.changeset_key_generator.gen_id(), late_item_ids)
1300 self.changeset_graph.add_new_changeset(early_changeset)
1301 self.changeset_graph.add_new_changeset(late_changeset)
1303 early_split = self._split_if_retrograde(early_changeset.id)
1305 # Because of the way we constructed it, the early changeset should
1306 # not have to be split:
1307 assert not early_split
1309 self._split_if_retrograde(late_changeset.id)
1311 def _split_if_retrograde(self, changeset_id):
1312 node = self.changeset_graph[changeset_id]
1313 pred_ordinals = [
1314 self.ordinals[id]
1315 for id in node.pred_ids
1316 if id in self.ordinals
1318 pred_ordinals.sort()
1319 succ_ordinals = [
1320 self.ordinals[id]
1321 for id in node.succ_ids
1322 if id in self.ordinals
1324 succ_ordinals.sort()
1325 if pred_ordinals and succ_ordinals \
1326 and pred_ordinals[-1] >= succ_ordinals[0]:
1327 self._split_retrograde_changeset(self.changeset_db[node.id])
1328 return True
1329 else:
1330 return False
1332 def break_segment(self, segment):
1333 """Break a changeset in SEGMENT[1:-1].
1335 The range SEGMENT[1:-1] is not empty, and all of the changesets in
1336 that range are SymbolChangesets."""
1338 best_i = None
1339 best_link = None
1340 for i in range(1, len(segment) - 1):
1341 link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
1343 if best_i is None or link < best_link:
1344 best_i = i
1345 best_link = link
1347 if Log().is_on(Log.DEBUG):
1348 Log().debug(
1349 'Breaking segment %s by breaking node %x' % (
1350 ' -> '.join(['%x' % node.id for node in segment]),
1351 best_link.changeset.id,))
1353 new_changesets = best_link.break_changeset(self.changeset_key_generator)
1355 self.changeset_graph.delete_changeset(best_link.changeset)
1357 for changeset in new_changesets:
1358 self.changeset_graph.add_new_changeset(changeset)
1360 def break_cycle(self, cycle):
1361 """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
1363 CYCLE is a list of SymbolChangesets where
1365 cycle[i] depends on cycle[i - 1]
1367 . Break up one or more changesets in CYCLE to make progress
1368 towards breaking the cycle. Update self.changeset_graph
1369 accordingly.
1371 It is not guaranteed that the cycle will be broken by one call to
1372 this routine, but at least some progress must be made."""
1374 if Log().is_on(Log.DEBUG):
1375 Log().debug(
1376 'Breaking cycle %s' % (
1377 ' -> '.join(['%x' % changeset.id
1378 for changeset in cycle + [cycle[0]]]),))
1380 # Unwrap the cycle into a segment then break the segment:
1381 self.break_segment([cycle[-1]] + cycle + [cycle[0]])
1383 def run(self, run_options, stats_keeper):
1384 Log().quiet("Breaking CVSSymbol dependency loops...")
1386 Ctx()._projects = read_projects(
1387 artifact_manager.get_temp_file(config.PROJECTS)
1389 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1390 Ctx()._symbol_db = SymbolDatabase()
1391 Ctx()._cvs_items_db = IndexedCVSItemStore(
1392 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1393 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1394 DB_OPEN_READ)
1396 shutil.copyfile(
1397 artifact_manager.get_temp_file(
1398 config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
1399 artifact_manager.get_temp_file(
1400 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
1401 self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
1402 artifact_manager.get_temp_file(
1403 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
1404 DB_OPEN_WRITE)
1406 self.changeset_db = ChangesetDatabase(
1407 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1408 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1409 DB_OPEN_NEW)
1411 self.changeset_graph = ChangesetGraph(
1412 self.changeset_db, self.cvs_item_to_changeset_id
1415 # A map {changeset_id : ordinal} for OrderedChangesets:
1416 self.ordinals = {}
1417 # A map {ordinal : changeset_id}:
1418 ordered_changeset_map = {}
1419 # A list of all BranchChangeset ids:
1420 branch_changeset_ids = []
1421 max_changeset_id = 0
1422 for changeset in self.get_source_changesets():
1423 self.changeset_db.store(changeset)
1424 self.changeset_graph.add_changeset(changeset)
1425 if isinstance(changeset, OrderedChangeset):
1426 ordered_changeset_map[changeset.ordinal] = changeset.id
1427 self.ordinals[changeset.id] = changeset.ordinal
1428 elif isinstance(changeset, BranchChangeset):
1429 branch_changeset_ids.append(changeset.id)
1430 max_changeset_id = max(max_changeset_id, changeset.id)
1432 # An array of ordered_changeset ids, indexed by ordinal:
1433 ordered_changesets = []
1434 for ordinal in range(len(ordered_changeset_map)):
1435 id = ordered_changeset_map[ordinal]
1436 ordered_changesets.append(id)
1438 ordered_changeset_ids = set(ordered_changeset_map.values())
1439 del ordered_changeset_map
1441 self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
1443 # First we scan through all BranchChangesets looking for
1444 # changesets that are individually "retrograde" and splitting
1445 # those up:
1446 for changeset_id in branch_changeset_ids:
1447 self._split_if_retrograde(changeset_id)
1449 del self.ordinals
1451 next_ordered_changeset = 0
1453 self.processed_changeset_logger = ProcessedChangesetLogger()
1455 while self.changeset_graph:
1456 # Consume any nodes that don't have predecessors:
1457 for (changeset, time_range) \
1458 in self.changeset_graph.consume_nopred_nodes():
1459 self.processed_changeset_logger.log(changeset.id)
1460 if changeset.id in ordered_changeset_ids:
1461 next_ordered_changeset += 1
1462 ordered_changeset_ids.remove(changeset.id)
1464 self.processed_changeset_logger.flush()
1466 if not self.changeset_graph:
1467 break
1469 # Now work on the next ordered changeset that has not yet been
1470 # processed. BreakSymbolChangesetCyclesPass has broken any
1471 # cycles involving only SymbolChangesets, so the presence of a
1472 # cycle implies that there is at least one ordered changeset
1473 # left in the graph:
1474 assert next_ordered_changeset < len(ordered_changesets)
1476 id = ordered_changesets[next_ordered_changeset]
1477 path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
1478 if path:
1479 if Log().is_on(Log.DEBUG):
1480 Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
1481 self.break_segment(path)
1482 else:
1483 # There were no ordered changesets among the reachable
1484 # predecessors, so do generic cycle-breaking:
1485 if Log().is_on(Log.DEBUG):
1486 Log().debug(
1487 'Breaking generic cycle found from %s'
1488 % (self.changeset_db[id],)
1490 self.break_cycle(self.changeset_graph.find_cycle(id))
1492 del self.processed_changeset_logger
1493 self.changeset_graph.close()
1494 self.changeset_graph = None
1495 self.cvs_item_to_changeset_id = None
1496 self.changeset_db = None
1498 Log().quiet("Done")
1501 class TopologicalSortPass(Pass):
1502 """Sort changesets into commit order."""
1504 def register_artifacts(self):
1505 self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
1506 self._register_temp_file_needed(config.PROJECTS)
1507 self._register_temp_file_needed(config.SYMBOL_DB)
1508 self._register_temp_file_needed(config.CVS_FILES_DB)
1509 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1510 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1511 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1512 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1513 self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
1515 def get_source_changesets(self, changeset_db):
1516 for changeset_id in changeset_db.keys():
1517 yield changeset_db[changeset_id]
1519 def get_changesets(self):
1520 """Generate (changeset, timestamp) pairs in commit order."""
1522 changeset_db = ChangesetDatabase(
1523 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1524 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1525 DB_OPEN_READ)
1527 changeset_graph = ChangesetGraph(
1528 changeset_db,
1529 CVSItemToChangesetTable(
1530 artifact_manager.get_temp_file(
1531 config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
1533 DB_OPEN_READ,
1536 symbol_changeset_ids = set()
1538 for changeset in self.get_source_changesets(changeset_db):
1539 changeset_graph.add_changeset(changeset)
1540 if isinstance(changeset, SymbolChangeset):
1541 symbol_changeset_ids.add(changeset.id)
1543 # Ensure a monotonically-increasing timestamp series by keeping
1544 # track of the previous timestamp and ensuring that the following
1545 # one is larger.
1546 timestamper = Timestamper()
1548 for (changeset, time_range) in changeset_graph.consume_graph():
1549 timestamp = timestamper.get(
1550 time_range.t_max, changeset.id in symbol_changeset_ids
1552 yield (changeset, timestamp)
1554 changeset_graph.close()
1556 def run(self, run_options, stats_keeper):
1557 Log().quiet("Generating CVSRevisions in commit order...")
1559 Ctx()._projects = read_projects(
1560 artifact_manager.get_temp_file(config.PROJECTS)
1562 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1563 Ctx()._symbol_db = SymbolDatabase()
1564 Ctx()._cvs_items_db = IndexedCVSItemStore(
1565 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1566 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1567 DB_OPEN_READ)
1569 sorted_changesets = open(
1570 artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
1571 'w')
1573 for (changeset, timestamp) in self.get_changesets():
1574 sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
1576 sorted_changesets.close()
1578 Ctx()._cvs_items_db.close()
1579 Ctx()._symbol_db.close()
1580 Ctx()._cvs_file_db.close()
1582 Log().quiet("Done")
1585 class CreateRevsPass(Pass):
1586 """Generate the SVNCommit <-> CVSRevision mapping databases.
1588 SVNCommitCreator also calls SymbolingsLogger to register
1589 CVSRevisions that represent an opening or closing for a path on a
1590 branch or tag. See SymbolingsLogger for more details.
1592 This pass was formerly known as pass5."""
1594 def register_artifacts(self):
1595 self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
1596 self._register_temp_file(config.SVN_COMMITS_STORE)
1597 self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
1598 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
1599 self._register_temp_file_needed(config.PROJECTS)
1600 self._register_temp_file_needed(config.CVS_FILES_DB)
1601 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1602 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1603 self._register_temp_file_needed(config.SYMBOL_DB)
1604 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
1605 self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
1606 self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
1608 def get_changesets(self):
1609 """Generate (changeset,timestamp,) tuples in commit order."""
1611 changeset_db = ChangesetDatabase(
1612 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
1613 artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
1614 DB_OPEN_READ)
1616 for line in file(
1617 artifact_manager.get_temp_file(
1618 config.CHANGESETS_SORTED_DATAFILE)):
1619 [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
1620 yield (changeset_db[changeset_id], timestamp)
1622 changeset_db.close()
1624 def get_svn_commits(self, creator):
1625 """Generate the SVNCommits, in order."""
1627 for (changeset, timestamp) in self.get_changesets():
1628 for svn_commit in creator.process_changeset(changeset, timestamp):
1629 yield svn_commit
1631 def log_svn_commit(self, svn_commit):
1632 """Output information about SVN_COMMIT."""
1634 Log().normal(
1635 'Creating Subversion r%d (%s)'
1636 % (svn_commit.revnum, svn_commit.get_description(),)
1639 if isinstance(svn_commit, SVNRevisionCommit):
1640 for cvs_rev in svn_commit.cvs_revs:
1641 Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
1643 def run(self, run_options, stats_keeper):
1644 Log().quiet("Mapping CVS revisions to Subversion commits...")
1646 Ctx()._projects = read_projects(
1647 artifact_manager.get_temp_file(config.PROJECTS)
1649 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1650 Ctx()._symbol_db = SymbolDatabase()
1651 Ctx()._cvs_items_db = IndexedCVSItemStore(
1652 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1653 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1654 DB_OPEN_READ)
1656 Ctx()._symbolings_logger = SymbolingsLogger()
1658 persistence_manager = PersistenceManager(DB_OPEN_NEW)
1660 creator = SVNCommitCreator()
1661 for svn_commit in self.get_svn_commits(creator):
1662 self.log_svn_commit(svn_commit)
1663 persistence_manager.put_svn_commit(svn_commit)
1665 stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
1666 del creator
1668 persistence_manager.close()
1669 Ctx()._symbolings_logger.close()
1670 Ctx()._cvs_items_db.close()
1671 Ctx()._symbol_db.close()
1672 Ctx()._cvs_file_db.close()
1674 Log().quiet("Done")
1677 class SortSymbolsPass(Pass):
1678 """This pass was formerly known as pass6."""
1680 def register_artifacts(self):
1681 self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1682 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
1684 def run(self, run_options, stats_keeper):
1685 Log().quiet("Sorting symbolic name source revisions...")
1687 sort_file(
1688 artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
1689 artifact_manager.get_temp_file(
1690 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1691 options=['-k', '1,1', '-k', '2,2n', '-k', '3'],
1693 Log().quiet("Done")
1696 class IndexSymbolsPass(Pass):
1697 """This pass was formerly known as pass7."""
1699 def register_artifacts(self):
1700 self._register_temp_file(config.SYMBOL_OFFSETS_DB)
1701 self._register_temp_file_needed(config.PROJECTS)
1702 self._register_temp_file_needed(config.SYMBOL_DB)
1703 self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
1705 def generate_offsets_for_symbolings(self):
1706 """This function iterates through all the lines in
1707 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
1708 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
1709 where SYMBOLIC_NAME is first encountered. This will allow us to
1710 seek to the various offsets in the file and sequentially read only
1711 the openings and closings that we need."""
1713 offsets = {}
1715 f = open(
1716 artifact_manager.get_temp_file(
1717 config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
1718 'r')
1719 old_id = None
1720 while True:
1721 fpos = f.tell()
1722 line = f.readline()
1723 if not line:
1724 break
1725 id, svn_revnum, ignored = line.split(" ", 2)
1726 id = int(id, 16)
1727 if id != old_id:
1728 Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
1729 old_id = id
1730 offsets[id] = fpos
1732 f.close()
1734 offsets_db = file(
1735 artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
1736 cPickle.dump(offsets, offsets_db, -1)
1737 offsets_db.close()
1739 def run(self, run_options, stats_keeper):
1740 Log().quiet("Determining offsets for all symbolic names...")
1741 Ctx()._projects = read_projects(
1742 artifact_manager.get_temp_file(config.PROJECTS)
1744 Ctx()._symbol_db = SymbolDatabase()
1745 self.generate_offsets_for_symbolings()
1746 Ctx()._symbol_db.close()
1747 Log().quiet("Done.")
1750 class OutputPass(Pass):
1751 """This pass was formerly known as pass8."""
1753 def register_artifacts(self):
1754 self._register_temp_file_needed(config.PROJECTS)
1755 self._register_temp_file_needed(config.CVS_FILES_DB)
1756 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
1757 self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
1758 self._register_temp_file_needed(config.SYMBOL_DB)
1759 self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
1760 self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
1761 self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
1762 self._register_temp_file_needed(config.SVN_COMMITS_STORE)
1763 self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
1764 Ctx().output_option.register_artifacts(self)
1766 def run(self, run_options, stats_keeper):
1767 Ctx()._projects = read_projects(
1768 artifact_manager.get_temp_file(config.PROJECTS)
1770 Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
1771 Ctx()._metadata_db = MetadataDatabase(
1772 artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
1773 artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
1774 DB_OPEN_READ,
1776 Ctx()._cvs_items_db = IndexedCVSItemStore(
1777 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
1778 artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
1779 DB_OPEN_READ)
1780 Ctx()._symbol_db = SymbolDatabase()
1781 Ctx().persistence_manager = PersistenceManager(DB_OPEN_READ)
1783 Ctx().output_option.setup(stats_keeper.svn_rev_count())
1785 svn_revnum = 1
1786 svn_commit = Ctx().persistence_manager.get_svn_commit(svn_revnum)
1787 while svn_commit:
1788 svn_commit.output(Ctx().output_option)
1789 svn_revnum += 1
1790 svn_commit = Ctx().persistence_manager.get_svn_commit(svn_revnum)
1792 Ctx().output_option.cleanup()
1793 Ctx().persistence_manager.close()
1795 Ctx()._symbol_db.close()
1796 Ctx()._cvs_items_db.close()
1797 Ctx()._metadata_db.close()
1798 Ctx()._cvs_file_db.close()
1801 # The list of passes constituting a run of cvs2svn:
1802 passes = [
1803 CollectRevsPass(),
1804 CleanMetadataPass(),
1805 CollateSymbolsPass(),
1806 #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
1807 FilterSymbolsPass(),
1808 SortRevisionSummaryPass(),
1809 SortSymbolSummaryPass(),
1810 InitializeChangesetsPass(),
1811 #CheckIndexedItemStoreDependenciesPass(
1812 # config.CVS_ITEMS_SORTED_STORE,
1813 # config.CVS_ITEMS_SORTED_INDEX_TABLE),
1814 BreakRevisionChangesetCyclesPass(),
1815 RevisionTopologicalSortPass(),
1816 BreakSymbolChangesetCyclesPass(),
1817 BreakAllChangesetCyclesPass(),
1818 TopologicalSortPass(),
1819 CreateRevsPass(),
1820 SortSymbolsPass(),
1821 IndexSymbolsPass(),
1822 OutputPass(),