3 # (Be in -*- python -*- mode.)
5 # ====================================================================
6 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://subversion.tigris.org/license-1.html.
11 # If newer versions of this license are posted there, you may use a
12 # newer version instead, at your option.
14 # This software consists of voluntary contributions made by many
15 # individuals. For exact contribution history, see the revision
16 # history and logs, available at http://cvs2svn.tigris.org/.
17 # ====================================================================
19 """Shrink a test case as much as possible.
21 !!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
22 !! This script irretrievably destroys the CVS repository that it is !!
24 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
26 This script is meant to be used to shrink the size of a CVS repository
27 that is to be used as a test case for cvs2svn. It tries to throw out
28 parts of the repository while preserving the bug.
30 CVSREPO should be the path of a copy of a CVS archive. TEST_COMMAND
31 is a command that should run successfully (i.e., with exit code '0')
32 if the bug is still present, and fail if the bug is absent."""
39 from cStringIO
import StringIO
41 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(os
.path
.abspath(__file__
))))
43 from cvs2svn_lib
.key_generator
import KeyGenerator
45 import cvs2svn_rcsparse
48 from contrib
.rcs_file_filter
import WriteRCSFileSink
49 from contrib
.rcs_file_filter
import FilterSink
52 usage
= 'USAGE: %prog [options] CVSREPO TEST_COMMAND'
54 Simplify a CVS repository while preserving the presence of a bug.
56 ***THE CVS REPOSITORY WILL BE DESTROYED***
58 CVSREPO is the path to a CVS repository.
60 TEST_COMMAND is a command that runs successfully (i.e., with exit
61 code '0') if the bug is still present, and fails if the bug is
68 tmpdir
= 'shrink_test_case-tmp'
70 file_key_generator
= KeyGenerator(1)
73 def get_tmp_filename():
74 return os
.path
.join(tmpdir
, 'f%07d.tmp' % file_key_generator
.gen_id())
77 class CommandFailedException(Exception):
81 def command(cmd
, *args
):
83 sys
.stderr
.write('Running: %s %s...' % (cmd
, ' '.join(args
),))
84 retval
= os
.spawnlp(os
.P_WAIT
, cmd
, cmd
, *args
)
87 sys
.stderr
.write('failed (%s).\n' % retval
)
88 raise CommandFailedException(' '.join([cmd
] + list(args
)))
91 sys
.stderr
.write('succeeded.\n')
95 """A reversible modification that can be made to the repository."""
98 """Return the estimated size of this modification.
100 This should be approximately the number of bytes by which the
101 problem will be shrunk if this modification is successful. It
102 is used to choose the order to attempt the modifications."""
104 raise NotImplementedError()
107 """Modify the repository.
109 Store enough information that the change can be reverted."""
111 raise NotImplementedError()
114 """Revert this modification."""
116 raise NotImplementedError()
119 """Make this modification permanent."""
121 raise NotImplementedError()
123 def try_mod(self
, test_command
):
125 sys
.stdout
.write('Testing with the following modifications:\n')
126 self
.output(sys
.stdout
, ' ')
130 except CommandFailedException
:
133 'The bug disappeared. Reverting modifications.\n'
136 sys
.stdout
.write('Attempted modification unsuccessful.\n')
139 except KeyboardInterrupt:
140 sys
.stderr
.write('Interrupted. Reverting last modifications.\n')
145 'Unexpected exception. Reverting last modifications.\n'
152 sys
.stdout
.write('The bug remains. Keeping modifications.\n')
155 'The bug remains after the following modifications:\n'
157 self
.output(sys
.stdout
, ' ')
160 def get_submodifications(self
, success
):
161 """Return a generator or iterable of submodifications.
163 Return submodifications that should be tried after this this
164 modification. SUCCESS specifies whether this modification was
169 def output(self
, f
, prefix
=''):
170 raise NotImplementedError()
176 class EmptyModificationListException(Exception):
180 class SplitModification(Modification
):
181 """Holds two modifications split out of a failing modification.
183 Because the original modification failed, it known that mod1+mod2
184 can't succeed. So if mod1 succeeds, mod2 need not be attempted
185 (though its submodifications are attempted)."""
187 def __init__(self
, mod1
, mod2
):
188 # Choose mod1 to be the larger modification:
189 if mod2
.get_size() > mod1
.get_size():
190 mod1
, mod2
= mod2
, mod1
196 return self
.mod1
.get_size()
207 def get_submodifications(self
, success
):
209 for mod
in self
.mod2
.get_submodifications(False):
214 for mod
in self
.mod1
.get_submodifications(success
):
217 def output(self
, f
, prefix
=''):
218 self
.mod1
.output(f
, prefix
=prefix
)
221 return 'SplitModification(%s, %s)' % (self
.mod1
, self
.mod2
,)
224 class CompoundModification(Modification
):
225 def __init__(self
, modifications
):
226 if not modifications
:
227 raise EmptyModificationListException()
228 self
.modifications
= modifications
229 self
.size
= sum(mod
.get_size() for mod
in self
.modifications
)
235 for modification
in self
.modifications
:
236 modification
.modify()
239 for modification
in self
.modifications
:
240 modification
.revert()
243 for modification
in self
.modifications
:
244 modification
.commit()
246 def get_submodifications(self
, success
):
248 # All modifications were completed successfully; no need
251 elif len(self
.modifications
) == 1:
252 # Our modification list cannot be subdivided, but maybe
253 # the remaining modification can:
254 for mod
in self
.modifications
[0].get_submodifications(False):
257 # Create subsets of each half of the list and put them in
258 # a SplitModification:
259 n
= len(self
.modifications
) // 2
260 yield SplitModification(
261 create_modification(self
.modifications
[:n
]),
262 create_modification(self
.modifications
[n
:])
265 def output(self
, f
, prefix
=''):
266 for modification
in self
.modifications
:
267 modification
.output(f
, prefix
=prefix
)
270 return str(self
.modifications
)
273 def create_modification(mods
):
274 """Create and return a Modification based on the iterable MODS.
276 Raise EmptyModificationListException if mods is empty."""
282 return CompoundModification(mods
)
285 def compute_dir_size(path
):
286 # Add a little bit for the directory itself.
288 for filename
in os
.listdir(path
):
289 subpath
= os
.path
.join(path
, filename
)
290 if os
.path
.isdir(subpath
):
291 size
+= compute_dir_size(subpath
)
292 elif os
.path
.isfile(subpath
):
293 size
+= os
.path
.getsize(subpath
)
298 class DeleteDirectoryModification(Modification
):
299 def __init__(self
, path
):
301 self
.size
= compute_dir_size(self
.path
)
307 self
.tempfile
= get_tmp_filename()
308 shutil
.move(self
.path
, self
.tempfile
)
311 shutil
.move(self
.tempfile
, self
.path
)
315 shutil
.rmtree(self
.tempfile
)
318 def get_submodifications(self
, success
):
320 # The whole directory could be deleted; no need to recurse:
323 # Try deleting subdirectories:
325 DeleteDirectoryModification(subdir
)
326 for subdir
in get_dirs(self
.path
)
329 yield create_modification(mods
)
331 # Try deleting files:
333 DeleteFileModification(filename
)
334 for filename
in get_files(self
.path
)
337 yield create_modification(mods
)
339 def output(self
, f
, prefix
=''):
340 f
.write('%sDeleted directory %r\n' % (prefix
, self
.path
,))
343 return 'DeleteDirectory(%r)' % self
.path
346 class DeleteFileModification(Modification
):
347 def __init__(self
, path
):
349 self
.size
= os
.path
.getsize(self
.path
)
355 self
.tempfile
= get_tmp_filename()
356 shutil
.move(self
.path
, self
.tempfile
)
359 shutil
.move(self
.tempfile
, self
.path
)
363 os
.remove(self
.tempfile
)
366 def output(self
, f
, prefix
=''):
367 f
.write('%sDeleted file %r\n' % (prefix
, self
.path
,))
370 return 'DeleteFile(%r)' % self
.path
373 def rev_tuple(revision
):
374 retval
= [int(s
) for s
in revision
.split('.') if int(s
)]
382 raise NotImplementedError()
384 def get_filter_sink(self
, sink
):
385 raise NotImplementedError()
387 def filter(self
, text
):
389 sink
= WriteRCSFileSink(fout
)
390 filter = self
.get_filter_sink(sink
)
391 cvs2svn_rcsparse
.parse(StringIO(text
), filter)
392 return fout
.getvalue()
394 def get_subfilters(self
):
397 def output(self
, f
, prefix
=''):
398 raise NotImplementedError()
401 class DeleteTagRCSFileFilter(RCSFileFilter
):
402 class Sink(FilterSink
):
403 def __init__(self
, sink
, tagname
):
404 FilterSink
.__init
__(self
, sink
)
405 self
.tagname
= tagname
407 def define_tag(self
, name
, revision
):
408 if name
!= self
.tagname
:
409 FilterSink
.define_tag(self
, name
, revision
)
411 def __init__(self
, tagname
):
412 self
.tagname
= tagname
417 def get_filter_sink(self
, sink
):
418 return self
.Sink(sink
, self
.tagname
)
420 def output(self
, f
, prefix
=''):
421 f
.write('%sDeleted tag %r\n' % (prefix
, self
.tagname
,))
424 def get_tag_set(path
):
425 class TagCollector(cvs2svn_rcsparse
.Sink
):
429 # A map { branch_tuple : name } for branches on which no
430 # revisions have yet been seen:
433 def define_tag(self
, name
, revision
):
434 revtuple
= rev_tuple(revision
)
435 if len(revtuple
) % 2 == 0:
436 # This is a tag (as opposed to branch)
439 self
.branches
[revtuple
] = name
442 self
, revision
, timestamp
, author
, state
, branches
, next
444 branch
= rev_tuple(revision
)[:-1]
446 del self
.branches
[branch
]
452 for branch
in self
.branches
.values():
456 tag_collector
= TagCollector()
457 cvs2svn_rcsparse
.parse(open(path
, 'rb'), tag_collector
)
458 return tag_collector
.get_tags()
461 class DeleteBranchTreeRCSFileFilter(RCSFileFilter
):
462 class Sink(FilterSink
):
463 def __init__(self
, sink
, branch_rev
):
464 FilterSink
.__init
__(self
, sink
)
465 self
.branch_rev
= branch_rev
467 def is_on_branch(self
, revision
):
468 revtuple
= rev_tuple(revision
)
469 return revtuple
[:len(self
.branch_rev
)] == self
.branch_rev
471 def define_tag(self
, name
, revision
):
472 if not self
.is_on_branch(revision
):
473 FilterSink
.define_tag(self
, name
, revision
)
476 self
, revision
, timestamp
, author
, state
, branches
, next
478 if not self
.is_on_branch(revision
):
481 for branch
in branches
482 if not self
.is_on_branch(branch
)
484 FilterSink
.define_revision(
485 self
, revision
, timestamp
, author
, state
, branches
, next
488 def set_revision_info(self
, revision
, log
, text
):
489 if not self
.is_on_branch(revision
):
490 FilterSink
.set_revision_info(self
, revision
, log
, text
)
492 def __init__(self
, branch_rev
, subbranch_tree
):
493 self
.branch_rev
= branch_rev
494 self
.subbranch_tree
= subbranch_tree
499 def get_filter_sink(self
, sink
):
500 return self
.Sink(sink
, self
.branch_rev
)
502 def get_subfilters(self
):
503 for (branch_rev
, subbranch_tree
) in self
.subbranch_tree
:
504 yield DeleteBranchTreeRCSFileFilter(branch_rev
, subbranch_tree
)
506 def output(self
, f
, prefix
=''):
508 '%sDeleted branch %s\n'
509 % (prefix
, '.'.join([str(s
) for s
in self
.branch_rev
]),)
513 def get_branch_tree(path
):
514 """Return the forest of branches in path.
516 Return [(branch_revision, [sub_branch, ...]), ...], where
517 branch_revision is a revtuple and sub_branch has the same form as
518 the whole return value.
522 class BranchCollector(cvs2svn_rcsparse
.Sink
):
527 self
, revision
, timestamp
, author
, state
, branches
, next
529 parent
= rev_tuple(revision
)[:-1]
532 entry
= self
.branches
.setdefault(parent
, [])
533 for branch
in branches
:
534 entry
.append(rev_tuple(branch
)[:-1])
536 def _get_subbranches(self
, parent
):
539 branches
= self
.branches
[parent
]
542 del self
.branches
[parent
]
543 for branch
in branches
:
544 subbranches
= self
._get
_subbranches
(branch
)
545 retval
.append((branch
, subbranches
,))
548 def get_branches(self
):
549 retval
= self
._get
_subbranches
((1,))
550 assert not self
.branches
553 branch_collector
= BranchCollector()
554 cvs2svn_rcsparse
.parse(open(path
, 'rb'), branch_collector
)
555 return branch_collector
.get_branches()
558 class RCSFileModification(Modification
):
559 """A Modification that involves changing the contents of an RCS file."""
561 def __init__(self
, path
, filters
):
563 self
.filters
= filters
[:]
565 for filter in self
.filters
:
566 self
.size
+= filter.get_size()
572 self
.tempfile
= get_tmp_filename()
573 shutil
.move(self
.path
, self
.tempfile
)
574 text
= open(self
.tempfile
, 'rb').read()
575 for filter in self
.filters
:
576 text
= filter.filter(text
)
577 open(self
.path
, 'wb').write(text
)
580 shutil
.move(self
.tempfile
, self
.path
)
584 os
.remove(self
.tempfile
)
587 def get_submodifications(self
, success
):
589 # All filters completed successfully; no need to try
592 elif len(self
.filters
) == 1:
593 # The last filter failed; see if it has any subfilters:
594 subfilters
= list(self
.filters
[0].get_subfilters())
596 yield RCSFileModification(self
.path
, subfilters
)
598 n
= len(self
.filters
) // 2
599 yield SplitModification(
600 RCSFileModification(self
.path
, self
.filters
[:n
]),
601 RCSFileModification(self
.path
, self
.filters
[n
:])
604 def output(self
, f
, prefix
=''):
605 f
.write('%sModified file %r\n' % (prefix
, self
.path
,))
606 for filter in self
.filters
:
607 filter.output(f
, prefix
=(prefix
+ ' '))
610 return 'RCSFileModification(%r)' % (self
.filters
,)
613 def try_modification_combinations(test_command
, mods
):
614 """Try MOD and its submodifications.
616 Return True if any modifications were successful."""
618 # A list of lists of modifications that should still be tried:
622 todo
.sort(key
=lambda mod
: mod
.get_size())
624 success
= mod
.try_mod(test_command
)
625 # Now add possible submodifications to the list of things to try:
626 todo
.extend(mod
.get_submodifications(success
))
630 filenames
= os
.listdir(path
)
632 for filename
in filenames
:
633 subpath
= os
.path
.join(path
, filename
)
634 if os
.path
.isdir(subpath
):
638 def get_files(path
, recurse
=False):
639 filenames
= os
.listdir(path
)
641 for filename
in filenames
:
642 subpath
= os
.path
.join(path
, filename
)
643 if os
.path
.isfile(subpath
):
645 elif recurse
and os
.path
.isdir(subpath
):
646 for x
in get_files(subpath
, recurse
=recurse
):
650 def shrink_repository(test_command
, cvsrepo
):
651 try_modification_combinations(
652 test_command
, [DeleteDirectoryModification(cvsrepo
)]
655 # Try deleting branches:
657 for path
in get_files(cvsrepo
, recurse
=True):
658 branch_tree
= get_branch_tree(path
)
661 for (branch_revision
, subbranch_tree
) in branch_tree
:
663 DeleteBranchTreeRCSFileFilter(
664 branch_revision
, subbranch_tree
667 mods
.append(RCSFileModification(path
, filters
))
669 try_modification_combinations(test_command
, mods
)
673 for path
in get_files(cvsrepo
, recurse
=True):
674 tags
= list(get_tag_set(path
))
677 filters
= [DeleteTagRCSFileFilter(tag
) for tag
in tags
]
678 mods
.append(RCSFileModification(path
, filters
))
681 try_modification_combinations(test_command
, mods
)
684 first_fail_message
= """\
685 ERROR! The test command failed with the original repository. The
686 test command should be designed so that it succeeds (indicating that
687 the bug is still present) with the original repository, and fails only
688 after the bug disappears. Please fix your test command and start
693 class MyHelpFormatter(optparse
.IndentedHelpFormatter
):
694 """A HelpFormatter for optparse that doesn't reformat the description."""
696 def format_description(self
, description
):
701 parser
= optparse
.OptionParser(
702 usage
=usage
, description
=description
,
703 formatter
=MyHelpFormatter(),
705 parser
.set_defaults(skip_initial_test
=False)
707 '--skip-initial-test',
708 action
='store_true', default
=False,
709 help='skip verifying that the bug exists in the original repository',
712 (options
, args
) = parser
.parse_args()
719 if not os
.path
.isdir(tmpdir
):
722 if not options
.skip_initial_test
:
723 sys
.stdout
.write('Testing with the original repository.\n')
726 except CommandFailedException
, e
:
727 sys
.stderr
.write(first_fail_message
)
730 'The bug is confirmed to exist in the initial repository.\n'
735 shrink_repository(test_command
, cvsrepo
)
736 except KeyboardInterrupt:
742 sys
.stderr
.write('ERROR: %s (ignored)\n' % (e
,))
745 if __name__
== '__main__':