3 # (Be in -*- python -*- mode.)
5 # ====================================================================
6 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://subversion.tigris.org/license-1.html.
11 # If newer versions of this license are posted there, you may use a
12 # newer version instead, at your option.
14 # This software consists of voluntary contributions made by many
15 # individuals. For exact contribution history, see the revision
16 # history and logs, available at http://cvs2svn.tigris.org/.
17 # ====================================================================
19 """Shrink a test case as much as possible.
21 !!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
22 !! This script irretrievably destroys the CVS repository that it is !!
24 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
26 This script is meant to be used to shrink the size of a CVS repository
27 that is to be used as a test case for cvs2svn. It tries to throw out
28 parts of the repository while preserving the bug.
30 CVSREPO should be the path of a copy of a CVS archive. TEST_COMMAND
31 is a command that should run successfully (i.e., with exit code '0')
32 if the bug is still present, and fail if the bug is absent."""
39 from cStringIO
import StringIO
41 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(os
.path
.abspath(__file__
))))
43 from cvs2svn_lib
.key_generator
import KeyGenerator
45 from cvs2svn_lib
.rcsparser
import Sink
46 from cvs2svn_lib
.rcsparser
import parse
48 from contrib
.rcs_file_filter
import WriteRCSFileSink
49 from contrib
.rcs_file_filter
import FilterSink
52 usage
= 'USAGE: %prog [options] CVSREPO TEST_COMMAND'
54 Simplify a CVS repository while preserving the presence of a bug.
56 ***THE CVS REPOSITORY WILL BE DESTROYED***
58 CVSREPO is the path to a CVS repository.
60 TEST_COMMAND is a command that runs successfully (i.e., with exit
61 code '0') if the bug is still present, and fails if the bug is
68 tmpdir
= 'shrink_test_case-tmp'
70 file_key_generator
= KeyGenerator(1)
73 def get_tmp_filename():
74 return os
.path
.join(tmpdir
, 'f%07d.tmp' % file_key_generator
.gen_id())
77 class CommandFailedException(Exception):
81 def command(cmd
, *args
):
83 sys
.stderr
.write('Running: %s %s...' % (cmd
, ' '.join(args
),))
84 retval
= os
.spawnlp(os
.P_WAIT
, cmd
, cmd
, *args
)
87 sys
.stderr
.write('failed (%s).\n' % retval
)
88 raise CommandFailedException(' '.join([cmd
] + list(args
)))
91 sys
.stderr
.write('succeeded.\n')
95 """A reversible modification that can be made to the repository."""
98 """Return the estimated size of this modification.
100 This should be approximately the number of bytes by which the
101 problem will be shrunk if this modification is successful. It
102 is used to choose the order to attempt the modifications."""
104 raise NotImplementedError()
107 """Modify the repository.
109 Store enough information that the change can be reverted."""
111 raise NotImplementedError()
114 """Revert this modification."""
116 raise NotImplementedError()
119 """Make this modification permanent."""
121 raise NotImplementedError()
123 def try_mod(self
, test_command
):
125 sys
.stdout
.write('Testing with the following modifications:\n')
126 self
.output(sys
.stdout
, ' ')
130 except CommandFailedException
:
133 'The bug disappeared. Reverting modifications.\n'
136 sys
.stdout
.write('Attempted modification unsuccessful.\n')
139 except KeyboardInterrupt:
140 sys
.stderr
.write('Interrupted. Reverting last modifications.\n')
145 'Unexpected exception. Reverting last modifications.\n'
152 sys
.stdout
.write('The bug remains. Keeping modifications.\n')
155 'The bug remains after the following modifications:\n'
157 self
.output(sys
.stdout
, ' ')
160 def get_submodifications(self
, success
):
161 """Return a generator or iterable of submodifications.
163 Return submodifications that should be tried after this this
164 modification. SUCCESS specifies whether this modification was
169 def output(self
, f
, prefix
=''):
170 raise NotImplementedError()
176 class EmptyModificationListException(Exception):
180 class SplitModification(Modification
):
181 """Holds two modifications split out of a failing modification.
183 Because the original modification failed, it known that mod1+mod2
184 can't succeed. So if mod1 succeeds, mod2 need not be attempted
185 (though its submodifications are attempted)."""
187 def __init__(self
, mod1
, mod2
):
188 # Choose mod1 to be the larger modification:
189 if mod2
.get_size() > mod1
.get_size():
190 mod1
, mod2
= mod2
, mod1
196 return self
.mod1
.get_size()
207 def get_submodifications(self
, success
):
209 for mod
in self
.mod2
.get_submodifications(False):
214 for mod
in self
.mod1
.get_submodifications(success
):
217 def output(self
, f
, prefix
=''):
218 self
.mod1
.output(f
, prefix
=prefix
)
221 return 'SplitModification(%s, %s)' % (self
.mod1
, self
.mod2
,)
224 class CompoundModification(Modification
):
225 def __init__(self
, modifications
):
226 if not modifications
:
227 raise EmptyModificationListException()
228 self
.modifications
= modifications
229 self
.size
= sum(mod
.get_size() for mod
in self
.modifications
)
235 for modification
in self
.modifications
:
236 modification
.modify()
239 for modification
in self
.modifications
:
240 modification
.revert()
243 for modification
in self
.modifications
:
244 modification
.commit()
246 def get_submodifications(self
, success
):
248 # All modifications were completed successfully; no need
251 elif len(self
.modifications
) == 1:
252 # Our modification list cannot be subdivided, but maybe
253 # the remaining modification can:
254 for mod
in self
.modifications
[0].get_submodifications(False):
257 # Create subsets of each half of the list and put them in
258 # a SplitModification:
259 n
= len(self
.modifications
) // 2
260 yield SplitModification(
261 create_modification(self
.modifications
[:n
]),
262 create_modification(self
.modifications
[n
:])
265 def output(self
, f
, prefix
=''):
266 for modification
in self
.modifications
:
267 modification
.output(f
, prefix
=prefix
)
270 return str(self
.modifications
)
273 def create_modification(mods
):
274 """Create and return a Modification based on the iterable MODS.
276 Raise EmptyModificationListException if mods is empty."""
282 return CompoundModification(mods
)
285 def compute_dir_size(path
):
286 # Add a little bit for the directory itself.
288 for filename
in os
.listdir(path
):
289 subpath
= os
.path
.join(path
, filename
)
290 if os
.path
.isdir(subpath
):
291 size
+= compute_dir_size(subpath
)
292 elif os
.path
.isfile(subpath
):
293 size
+= os
.path
.getsize(subpath
)
298 class DeleteDirectoryModification(Modification
):
299 def __init__(self
, path
):
301 self
.size
= compute_dir_size(self
.path
)
307 self
.tempfile
= get_tmp_filename()
308 shutil
.move(self
.path
, self
.tempfile
)
311 shutil
.move(self
.tempfile
, self
.path
)
315 shutil
.rmtree(self
.tempfile
)
318 def get_submodifications(self
, success
):
320 # The whole directory could be deleted; no need to recurse:
323 # Try deleting subdirectories:
325 DeleteDirectoryModification(subdir
)
326 for subdir
in get_dirs(self
.path
)
329 yield create_modification(mods
)
331 # Try deleting files:
333 DeleteFileModification(filename
)
334 for filename
in get_files(self
.path
)
337 yield create_modification(mods
)
339 def output(self
, f
, prefix
=''):
340 f
.write('%sDeleted directory %r\n' % (prefix
, self
.path
,))
343 return 'DeleteDirectory(%r)' % self
.path
346 class DeleteFileModification(Modification
):
347 def __init__(self
, path
):
349 self
.size
= os
.path
.getsize(self
.path
)
355 self
.tempfile
= get_tmp_filename()
356 shutil
.move(self
.path
, self
.tempfile
)
359 shutil
.move(self
.tempfile
, self
.path
)
363 os
.remove(self
.tempfile
)
366 def output(self
, f
, prefix
=''):
367 f
.write('%sDeleted file %r\n' % (prefix
, self
.path
,))
370 return 'DeleteFile(%r)' % self
.path
373 def rev_tuple(revision
):
374 retval
= [int(s
) for s
in revision
.split('.') if int(s
)]
382 raise NotImplementedError()
384 def get_filter_sink(self
, sink
):
385 raise NotImplementedError()
387 def filter(self
, text
):
389 sink
= WriteRCSFileSink(fout
)
390 filter = self
.get_filter_sink(sink
)
391 parse(StringIO(text
), filter)
392 return fout
.getvalue()
394 def get_subfilters(self
):
397 def output(self
, f
, prefix
=''):
398 raise NotImplementedError()
401 class DeleteTagRCSFileFilter(RCSFileFilter
):
402 class Sink(FilterSink
):
403 def __init__(self
, sink
, tagname
):
404 FilterSink
.__init
__(self
, sink
)
405 self
.tagname
= tagname
407 def define_tag(self
, name
, revision
):
408 if name
!= self
.tagname
:
409 FilterSink
.define_tag(self
, name
, revision
)
411 def __init__(self
, tagname
):
412 self
.tagname
= tagname
417 def get_filter_sink(self
, sink
):
418 return self
.Sink(sink
, self
.tagname
)
420 def output(self
, f
, prefix
=''):
421 f
.write('%sDeleted tag %r\n' % (prefix
, self
.tagname
,))
424 def get_tag_set(path
):
425 class TagCollector(Sink
):
429 # A map { branch_tuple : name } for branches on which no
430 # revisions have yet been seen:
433 def define_tag(self
, name
, revision
):
434 revtuple
= rev_tuple(revision
)
435 if len(revtuple
) % 2 == 0:
436 # This is a tag (as opposed to branch)
439 self
.branches
[revtuple
] = name
442 self
, revision
, timestamp
, author
, state
, branches
, next
444 branch
= rev_tuple(revision
)[:-1]
446 del self
.branches
[branch
]
452 for branch
in self
.branches
.values():
456 tag_collector
= TagCollector()
459 parse(f
, tag_collector
)
462 return tag_collector
.get_tags()
465 class DeleteBranchTreeRCSFileFilter(RCSFileFilter
):
466 class Sink(FilterSink
):
467 def __init__(self
, sink
, branch_rev
):
468 FilterSink
.__init
__(self
, sink
)
469 self
.branch_rev
= branch_rev
471 def is_on_branch(self
, revision
):
472 revtuple
= rev_tuple(revision
)
473 return revtuple
[:len(self
.branch_rev
)] == self
.branch_rev
475 def define_tag(self
, name
, revision
):
476 if not self
.is_on_branch(revision
):
477 FilterSink
.define_tag(self
, name
, revision
)
480 self
, revision
, timestamp
, author
, state
, branches
, next
482 if not self
.is_on_branch(revision
):
485 for branch
in branches
486 if not self
.is_on_branch(branch
)
488 FilterSink
.define_revision(
489 self
, revision
, timestamp
, author
, state
, branches
, next
492 def set_revision_info(self
, revision
, log
, text
):
493 if not self
.is_on_branch(revision
):
494 FilterSink
.set_revision_info(self
, revision
, log
, text
)
496 def __init__(self
, branch_rev
, subbranch_tree
):
497 self
.branch_rev
= branch_rev
498 self
.subbranch_tree
= subbranch_tree
503 def get_filter_sink(self
, sink
):
504 return self
.Sink(sink
, self
.branch_rev
)
506 def get_subfilters(self
):
507 for (branch_rev
, subbranch_tree
) in self
.subbranch_tree
:
508 yield DeleteBranchTreeRCSFileFilter(branch_rev
, subbranch_tree
)
510 def output(self
, f
, prefix
=''):
512 '%sDeleted branch %s\n'
513 % (prefix
, '.'.join([str(s
) for s
in self
.branch_rev
]),)
517 def get_branch_tree(path
):
518 """Return the forest of branches in path.
520 Return [(branch_revision, [sub_branch, ...]), ...], where
521 branch_revision is a revtuple and sub_branch has the same form as
522 the whole return value.
526 class BranchCollector(Sink
):
531 self
, revision
, timestamp
, author
, state
, branches
, next
533 parent
= rev_tuple(revision
)[:-1]
536 entry
= self
.branches
.setdefault(parent
, [])
537 for branch
in branches
:
538 entry
.append(rev_tuple(branch
)[:-1])
540 def _get_subbranches(self
, parent
):
543 branches
= self
.branches
[parent
]
546 del self
.branches
[parent
]
547 for branch
in branches
:
548 subbranches
= self
._get
_subbranches
(branch
)
549 retval
.append((branch
, subbranches
,))
552 def get_branches(self
):
553 retval
= self
._get
_subbranches
((1,))
554 assert not self
.branches
557 branch_collector
= BranchCollector()
560 parse(f
, branch_collector
)
563 return branch_collector
.get_branches()
566 class RCSFileModification(Modification
):
567 """A Modification that involves changing the contents of an RCS file."""
569 def __init__(self
, path
, filters
):
571 self
.filters
= filters
[:]
573 for filter in self
.filters
:
574 self
.size
+= filter.get_size()
580 self
.tempfile
= get_tmp_filename()
581 shutil
.move(self
.path
, self
.tempfile
)
583 f
= open(self
.tempfile
, 'rb')
589 for filter in self
.filters
:
590 text
= filter.filter(text
)
592 f
= open(self
.path
, 'wb')
599 shutil
.move(self
.tempfile
, self
.path
)
603 os
.remove(self
.tempfile
)
606 def get_submodifications(self
, success
):
608 # All filters completed successfully; no need to try
611 elif len(self
.filters
) == 1:
612 # The last filter failed; see if it has any subfilters:
613 subfilters
= list(self
.filters
[0].get_subfilters())
615 yield RCSFileModification(self
.path
, subfilters
)
617 n
= len(self
.filters
) // 2
618 yield SplitModification(
619 RCSFileModification(self
.path
, self
.filters
[:n
]),
620 RCSFileModification(self
.path
, self
.filters
[n
:])
623 def output(self
, f
, prefix
=''):
624 f
.write('%sModified file %r\n' % (prefix
, self
.path
,))
625 for filter in self
.filters
:
626 filter.output(f
, prefix
=(prefix
+ ' '))
629 return 'RCSFileModification(%r)' % (self
.filters
,)
632 def try_modification_combinations(test_command
, mods
):
633 """Try MOD and its submodifications.
635 Return True if any modifications were successful."""
637 # A list of lists of modifications that should still be tried:
641 todo
.sort(key
=lambda mod
: mod
.get_size())
643 success
= mod
.try_mod(test_command
)
644 # Now add possible submodifications to the list of things to try:
645 todo
.extend(mod
.get_submodifications(success
))
649 filenames
= os
.listdir(path
)
651 for filename
in filenames
:
652 subpath
= os
.path
.join(path
, filename
)
653 if os
.path
.isdir(subpath
):
657 def get_files(path
, recurse
=False):
658 filenames
= os
.listdir(path
)
660 for filename
in filenames
:
661 subpath
= os
.path
.join(path
, filename
)
662 if os
.path
.isfile(subpath
):
664 elif recurse
and os
.path
.isdir(subpath
):
665 for x
in get_files(subpath
, recurse
=recurse
):
669 def shrink_repository(test_command
, cvsrepo
):
670 try_modification_combinations(
671 test_command
, [DeleteDirectoryModification(cvsrepo
)]
674 # Try deleting branches:
676 for path
in get_files(cvsrepo
, recurse
=True):
677 branch_tree
= get_branch_tree(path
)
680 for (branch_revision
, subbranch_tree
) in branch_tree
:
682 DeleteBranchTreeRCSFileFilter(
683 branch_revision
, subbranch_tree
686 mods
.append(RCSFileModification(path
, filters
))
688 try_modification_combinations(test_command
, mods
)
692 for path
in get_files(cvsrepo
, recurse
=True):
693 tags
= list(get_tag_set(path
))
696 filters
= [DeleteTagRCSFileFilter(tag
) for tag
in tags
]
697 mods
.append(RCSFileModification(path
, filters
))
700 try_modification_combinations(test_command
, mods
)
703 first_fail_message
= """\
704 ERROR! The test command failed with the original repository. The
705 test command should be designed so that it succeeds (indicating that
706 the bug is still present) with the original repository, and fails only
707 after the bug disappears. Please fix your test command and start
712 class MyHelpFormatter(optparse
.IndentedHelpFormatter
):
713 """A HelpFormatter for optparse that doesn't reformat the description."""
715 def format_description(self
, description
):
720 parser
= optparse
.OptionParser(
721 usage
=usage
, description
=description
,
722 formatter
=MyHelpFormatter(),
724 parser
.set_defaults(skip_initial_test
=False)
726 '--skip-initial-test',
727 action
='store_true', default
=False,
728 help='skip verifying that the bug exists in the original repository',
731 (options
, args
) = parser
.parse_args()
738 if not os
.path
.isdir(tmpdir
):
741 if not options
.skip_initial_test
:
742 sys
.stdout
.write('Testing with the original repository.\n')
745 except CommandFailedException
, e
:
746 sys
.stderr
.write(first_fail_message
)
749 'The bug is confirmed to exist in the initial repository.\n'
754 shrink_repository(test_command
, cvsrepo
)
755 except KeyboardInterrupt:
761 sys
.stderr
.write('ERROR: %s (ignored)\n' % (e
,))
764 if __name__
== '__main__':