Default the repository path to the current directory for all variants.
[cvs2svn.git] / contrib / shrink_test_case.py
blob03255c61ad28430fe5cc6cc0ce8e58f001e5f045
1 #! /usr/bin/python
3 # (Be in -*- python -*- mode.)
5 # ====================================================================
6 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://subversion.tigris.org/license-1.html.
11 # If newer versions of this license are posted there, you may use a
12 # newer version instead, at your option.
14 # This software consists of voluntary contributions made by many
15 # individuals. For exact contribution history, see the revision
16 # history and logs, available at http://cvs2svn.tigris.org/.
17 # ====================================================================
19 """Shrink a test case as much as possible.
21 !!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
22 !! This script irretrievably destroys the CVS repository that it is !!
23 !! applied to! !!
24 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
26 This script is meant to be used to shrink the size of a CVS repository
27 that is to be used as a test case for cvs2svn. It tries to throw out
28 parts of the repository while preserving the bug.
30 CVSREPO should be the path of a copy of a CVS archive. TEST_COMMAND
31 is a command that should run successfully (i.e., with exit code '0')
32 if the bug is still present, and fail if the bug is absent."""
35 import sys
36 import os
37 import shutil
38 import optparse
39 from cStringIO import StringIO
41 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
43 from cvs2svn_lib.key_generator import KeyGenerator
45 from cvs2svn_lib.rcsparser import Sink
46 from cvs2svn_lib.rcsparser import parse
48 from contrib.rcs_file_filter import WriteRCSFileSink
49 from contrib.rcs_file_filter import FilterSink
52 usage = 'USAGE: %prog [options] CVSREPO TEST_COMMAND'
53 description = """\
54 Simplify a CVS repository while preserving the presence of a bug.
56 ***THE CVS REPOSITORY WILL BE DESTROYED***
58 CVSREPO is the path to a CVS repository.
60 TEST_COMMAND is a command that runs successfully (i.e., with exit
61 code '0') if the bug is still present, and fails if the bug is
62 absent.
63 """
66 verbose = 1
68 tmpdir = 'shrink_test_case-tmp'
70 file_key_generator = KeyGenerator(1)
73 def get_tmp_filename():
74 return os.path.join(tmpdir, 'f%07d.tmp' % file_key_generator.gen_id())
77 class CommandFailedException(Exception):
78 pass
81 def command(cmd, *args):
82 if verbose >= 2:
83 sys.stderr.write('Running: %s %s...' % (cmd, ' '.join(args),))
84 retval = os.spawnlp(os.P_WAIT, cmd, cmd, *args)
85 if retval:
86 if verbose >= 2:
87 sys.stderr.write('failed (%s).\n' % retval)
88 raise CommandFailedException(' '.join([cmd] + list(args)))
89 else:
90 if verbose >= 2:
91 sys.stderr.write('succeeded.\n')
94 class Modification:
95 """A reversible modification that can be made to the repository."""
97 def get_size(self):
98 """Return the estimated size of this modification.
100 This should be approximately the number of bytes by which the
101 problem will be shrunk if this modification is successful. It
102 is used to choose the order to attempt the modifications."""
104 raise NotImplementedError()
106 def modify(self):
107 """Modify the repository.
109 Store enough information that the change can be reverted."""
111 raise NotImplementedError()
113 def revert(self):
114 """Revert this modification."""
116 raise NotImplementedError()
118 def commit(self):
119 """Make this modification permanent."""
121 raise NotImplementedError()
123 def try_mod(self, test_command):
124 if verbose >= 1:
125 sys.stdout.write('Testing with the following modifications:\n')
126 self.output(sys.stdout, ' ')
127 self.modify()
128 try:
129 test_command()
130 except CommandFailedException:
131 if verbose >= 1:
132 sys.stdout.write(
133 'The bug disappeared. Reverting modifications.\n'
135 else:
136 sys.stdout.write('Attempted modification unsuccessful.\n')
137 self.revert()
138 return False
139 except KeyboardInterrupt:
140 sys.stderr.write('Interrupted. Reverting last modifications.\n')
141 self.revert()
142 raise
143 except Exception:
144 sys.stderr.write(
145 'Unexpected exception. Reverting last modifications.\n'
147 self.revert()
148 raise
149 else:
150 self.commit()
151 if verbose >= 1:
152 sys.stdout.write('The bug remains. Keeping modifications.\n')
153 else:
154 sys.stdout.write(
155 'The bug remains after the following modifications:\n'
157 self.output(sys.stdout, ' ')
158 return True
160 def get_submodifications(self, success):
161 """Return a generator or iterable of submodifications.
163 Return submodifications that should be tried after this this
164 modification. SUCCESS specifies whether this modification was
165 successful."""
167 return []
169 def output(self, f, prefix=''):
170 raise NotImplementedError()
172 def __repr__(self):
173 return str(self)
176 class EmptyModificationListException(Exception):
177 pass
180 class SplitModification(Modification):
181 """Holds two modifications split out of a failing modification.
183 Because the original modification failed, it known that mod1+mod2
184 can't succeed. So if mod1 succeeds, mod2 need not be attempted
185 (though its submodifications are attempted)."""
187 def __init__(self, mod1, mod2):
188 # Choose mod1 to be the larger modification:
189 if mod2.get_size() > mod1.get_size():
190 mod1, mod2 = mod2, mod1
192 self.mod1 = mod1
193 self.mod2 = mod2
195 def get_size(self):
196 return self.mod1.get_size()
198 def modify(self):
199 self.mod1.modify()
201 def revert(self):
202 self.mod1.revert()
204 def commit(self):
205 self.mod1.commit()
207 def get_submodifications(self, success):
208 if success:
209 for mod in self.mod2.get_submodifications(False):
210 yield mod
211 else:
212 yield self.mod2
214 for mod in self.mod1.get_submodifications(success):
215 yield mod
217 def output(self, f, prefix=''):
218 self.mod1.output(f, prefix=prefix)
220 def __str__(self):
221 return 'SplitModification(%s, %s)' % (self.mod1, self.mod2,)
224 class CompoundModification(Modification):
225 def __init__(self, modifications):
226 if not modifications:
227 raise EmptyModificationListException()
228 self.modifications = modifications
229 self.size = sum(mod.get_size() for mod in self.modifications)
231 def get_size(self):
232 return self.size
234 def modify(self):
235 for modification in self.modifications:
236 modification.modify()
238 def revert(self):
239 for modification in self.modifications:
240 modification.revert()
242 def commit(self):
243 for modification in self.modifications:
244 modification.commit()
246 def get_submodifications(self, success):
247 if success:
248 # All modifications were completed successfully; no need
249 # to try subsets:
250 pass
251 elif len(self.modifications) == 1:
252 # Our modification list cannot be subdivided, but maybe
253 # the remaining modification can:
254 for mod in self.modifications[0].get_submodifications(False):
255 yield mod
256 else:
257 # Create subsets of each half of the list and put them in
258 # a SplitModification:
259 n = len(self.modifications) // 2
260 yield SplitModification(
261 create_modification(self.modifications[:n]),
262 create_modification(self.modifications[n:])
265 def output(self, f, prefix=''):
266 for modification in self.modifications:
267 modification.output(f, prefix=prefix)
269 def __str__(self):
270 return str(self.modifications)
273 def create_modification(mods):
274 """Create and return a Modification based on the iterable MODS.
276 Raise EmptyModificationListException if mods is empty."""
278 mods = list(mods)
279 if len(mods) == 1:
280 return mods[0]
281 else:
282 return CompoundModification(mods)
285 def compute_dir_size(path):
286 # Add a little bit for the directory itself.
287 size = 100L
288 for filename in os.listdir(path):
289 subpath = os.path.join(path, filename)
290 if os.path.isdir(subpath):
291 size += compute_dir_size(subpath)
292 elif os.path.isfile(subpath):
293 size += os.path.getsize(subpath)
295 return size
298 class DeleteDirectoryModification(Modification):
299 def __init__(self, path):
300 self.path = path
301 self.size = compute_dir_size(self.path)
303 def get_size(self):
304 return self.size
306 def modify(self):
307 self.tempfile = get_tmp_filename()
308 shutil.move(self.path, self.tempfile)
310 def revert(self):
311 shutil.move(self.tempfile, self.path)
312 self.tempfile = None
314 def commit(self):
315 shutil.rmtree(self.tempfile)
316 self.tempfile = None
318 def get_submodifications(self, success):
319 if success:
320 # The whole directory could be deleted; no need to recurse:
321 pass
322 else:
323 # Try deleting subdirectories:
324 mods = [
325 DeleteDirectoryModification(subdir)
326 for subdir in get_dirs(self.path)
328 if mods:
329 yield create_modification(mods)
331 # Try deleting files:
332 mods = [
333 DeleteFileModification(filename)
334 for filename in get_files(self.path)
336 if mods:
337 yield create_modification(mods)
339 def output(self, f, prefix=''):
340 f.write('%sDeleted directory %r\n' % (prefix, self.path,))
342 def __str__(self):
343 return 'DeleteDirectory(%r)' % self.path
346 class DeleteFileModification(Modification):
347 def __init__(self, path):
348 self.path = path
349 self.size = os.path.getsize(self.path)
351 def get_size(self):
352 return self.size
354 def modify(self):
355 self.tempfile = get_tmp_filename()
356 shutil.move(self.path, self.tempfile)
358 def revert(self):
359 shutil.move(self.tempfile, self.path)
360 self.tempfile = None
362 def commit(self):
363 os.remove(self.tempfile)
364 self.tempfile = None
366 def output(self, f, prefix=''):
367 f.write('%sDeleted file %r\n' % (prefix, self.path,))
369 def __str__(self):
370 return 'DeleteFile(%r)' % self.path
373 def rev_tuple(revision):
374 retval = [int(s) for s in revision.split('.') if int(s)]
375 if retval[-2] == 0:
376 del retval[-2]
377 return tuple(retval)
380 class RCSFileFilter:
381 def get_size(self):
382 raise NotImplementedError()
384 def get_filter_sink(self, sink):
385 raise NotImplementedError()
387 def filter(self, text):
388 fout = StringIO()
389 sink = WriteRCSFileSink(fout)
390 filter = self.get_filter_sink(sink)
391 parse(StringIO(text), filter)
392 return fout.getvalue()
394 def get_subfilters(self):
395 return []
397 def output(self, f, prefix=''):
398 raise NotImplementedError()
401 class DeleteTagRCSFileFilter(RCSFileFilter):
402 class Sink(FilterSink):
403 def __init__(self, sink, tagname):
404 FilterSink.__init__(self, sink)
405 self.tagname = tagname
407 def define_tag(self, name, revision):
408 if name != self.tagname:
409 FilterSink.define_tag(self, name, revision)
411 def __init__(self, tagname):
412 self.tagname = tagname
414 def get_size(self):
415 return 50
417 def get_filter_sink(self, sink):
418 return self.Sink(sink, self.tagname)
420 def output(self, f, prefix=''):
421 f.write('%sDeleted tag %r\n' % (prefix, self.tagname,))
424 def get_tag_set(path):
425 class TagCollector(Sink):
426 def __init__(self):
427 self.tags = set()
429 # A map { branch_tuple : name } for branches on which no
430 # revisions have yet been seen:
431 self.branches = {}
433 def define_tag(self, name, revision):
434 revtuple = rev_tuple(revision)
435 if len(revtuple) % 2 == 0:
436 # This is a tag (as opposed to branch)
437 self.tags.add(name)
438 else:
439 self.branches[revtuple] = name
441 def define_revision(
442 self, revision, timestamp, author, state, branches, next
444 branch = rev_tuple(revision)[:-1]
445 try:
446 del self.branches[branch]
447 except KeyError:
448 pass
450 def get_tags(self):
451 tags = self.tags
452 for branch in self.branches.values():
453 tags.add(branch)
454 return tags
456 tag_collector = TagCollector()
457 f = open(path, 'rb')
458 try:
459 parse(f, tag_collector)
460 finally:
461 f.close()
462 return tag_collector.get_tags()
465 class DeleteBranchTreeRCSFileFilter(RCSFileFilter):
466 class Sink(FilterSink):
467 def __init__(self, sink, branch_rev):
468 FilterSink.__init__(self, sink)
469 self.branch_rev = branch_rev
471 def is_on_branch(self, revision):
472 revtuple = rev_tuple(revision)
473 return revtuple[:len(self.branch_rev)] == self.branch_rev
475 def define_tag(self, name, revision):
476 if not self.is_on_branch(revision):
477 FilterSink.define_tag(self, name, revision)
479 def define_revision(
480 self, revision, timestamp, author, state, branches, next
482 if not self.is_on_branch(revision):
483 branches = [
484 branch
485 for branch in branches
486 if not self.is_on_branch(branch)
488 FilterSink.define_revision(
489 self, revision, timestamp, author, state, branches, next
492 def set_revision_info(self, revision, log, text):
493 if not self.is_on_branch(revision):
494 FilterSink.set_revision_info(self, revision, log, text)
496 def __init__(self, branch_rev, subbranch_tree):
497 self.branch_rev = branch_rev
498 self.subbranch_tree = subbranch_tree
500 def get_size(self):
501 return 100
503 def get_filter_sink(self, sink):
504 return self.Sink(sink, self.branch_rev)
506 def get_subfilters(self):
507 for (branch_rev, subbranch_tree) in self.subbranch_tree:
508 yield DeleteBranchTreeRCSFileFilter(branch_rev, subbranch_tree)
510 def output(self, f, prefix=''):
511 f.write(
512 '%sDeleted branch %s\n'
513 % (prefix, '.'.join([str(s) for s in self.branch_rev]),)
517 def get_branch_tree(path):
518 """Return the forest of branches in path.
520 Return [(branch_revision, [sub_branch, ...]), ...], where
521 branch_revision is a revtuple and sub_branch has the same form as
522 the whole return value.
526 class BranchCollector(Sink):
527 def __init__(self):
528 self.branches = {}
530 def define_revision(
531 self, revision, timestamp, author, state, branches, next
533 parent = rev_tuple(revision)[:-1]
534 if len(parent) == 1:
535 parent = (1,)
536 entry = self.branches.setdefault(parent, [])
537 for branch in branches:
538 entry.append(rev_tuple(branch)[:-1])
540 def _get_subbranches(self, parent):
541 retval = []
542 try:
543 branches = self.branches[parent]
544 except KeyError:
545 return []
546 del self.branches[parent]
547 for branch in branches:
548 subbranches = self._get_subbranches(branch)
549 retval.append((branch, subbranches,))
550 return retval
552 def get_branches(self):
553 retval = self._get_subbranches((1,))
554 assert not self.branches
555 return retval
557 branch_collector = BranchCollector()
558 f = open(path, 'rb')
559 try:
560 parse(f, branch_collector)
561 finally:
562 f.close()
563 return branch_collector.get_branches()
566 class RCSFileModification(Modification):
567 """A Modification that involves changing the contents of an RCS file."""
569 def __init__(self, path, filters):
570 self.path = path
571 self.filters = filters[:]
572 self.size = 0
573 for filter in self.filters:
574 self.size += filter.get_size()
576 def get_size(self):
577 return self.size
579 def modify(self):
580 self.tempfile = get_tmp_filename()
581 shutil.move(self.path, self.tempfile)
583 f = open(self.tempfile, 'rb')
584 try:
585 text = f.read()
586 finally:
587 f.close()
589 for filter in self.filters:
590 text = filter.filter(text)
592 f = open(self.path, 'wb')
593 try:
594 f.write(text)
595 finally:
596 f.close()
598 def revert(self):
599 shutil.move(self.tempfile, self.path)
600 self.tempfile = None
602 def commit(self):
603 os.remove(self.tempfile)
604 self.tempfile = None
606 def get_submodifications(self, success):
607 if success:
608 # All filters completed successfully; no need to try
609 # subsets:
610 pass
611 elif len(self.filters) == 1:
612 # The last filter failed; see if it has any subfilters:
613 subfilters = list(self.filters[0].get_subfilters())
614 if subfilters:
615 yield RCSFileModification(self.path, subfilters)
616 else:
617 n = len(self.filters) // 2
618 yield SplitModification(
619 RCSFileModification(self.path, self.filters[:n]),
620 RCSFileModification(self.path, self.filters[n:])
623 def output(self, f, prefix=''):
624 f.write('%sModified file %r\n' % (prefix, self.path,))
625 for filter in self.filters:
626 filter.output(f, prefix=(prefix + ' '))
628 def __str__(self):
629 return 'RCSFileModification(%r)' % (self.filters,)
632 def try_modification_combinations(test_command, mods):
633 """Try MOD and its submodifications.
635 Return True if any modifications were successful."""
637 # A list of lists of modifications that should still be tried:
638 todo = list(mods)
640 while todo:
641 todo.sort(key=lambda mod: mod.get_size())
642 mod = todo.pop()
643 success = mod.try_mod(test_command)
644 # Now add possible submodifications to the list of things to try:
645 todo.extend(mod.get_submodifications(success))
648 def get_dirs(path):
649 filenames = os.listdir(path)
650 filenames.sort()
651 for filename in filenames:
652 subpath = os.path.join(path, filename)
653 if os.path.isdir(subpath):
654 yield subpath
657 def get_files(path, recurse=False):
658 filenames = os.listdir(path)
659 filenames.sort()
660 for filename in filenames:
661 subpath = os.path.join(path, filename)
662 if os.path.isfile(subpath):
663 yield subpath
664 elif recurse and os.path.isdir(subpath):
665 for x in get_files(subpath, recurse=recurse):
666 yield x
669 def shrink_repository(test_command, cvsrepo):
670 try_modification_combinations(
671 test_command, [DeleteDirectoryModification(cvsrepo)]
674 # Try deleting branches:
675 mods = []
676 for path in get_files(cvsrepo, recurse=True):
677 branch_tree = get_branch_tree(path)
678 if branch_tree:
679 filters = []
680 for (branch_revision, subbranch_tree) in branch_tree:
681 filters.append(
682 DeleteBranchTreeRCSFileFilter(
683 branch_revision, subbranch_tree
686 mods.append(RCSFileModification(path, filters))
687 if mods:
688 try_modification_combinations(test_command, mods)
690 # Try deleting tags:
691 mods = []
692 for path in get_files(cvsrepo, recurse=True):
693 tags = list(get_tag_set(path))
694 if tags:
695 tags.sort()
696 filters = [DeleteTagRCSFileFilter(tag) for tag in tags]
697 mods.append(RCSFileModification(path, filters))
699 if mods:
700 try_modification_combinations(test_command, mods)
703 first_fail_message = """\
704 ERROR! The test command failed with the original repository. The
705 test command should be designed so that it succeeds (indicating that
706 the bug is still present) with the original repository, and fails only
707 after the bug disappears. Please fix your test command and start
708 again.
712 class MyHelpFormatter(optparse.IndentedHelpFormatter):
713 """A HelpFormatter for optparse that doesn't reformat the description."""
715 def format_description(self, description):
716 return description
719 def main():
720 parser = optparse.OptionParser(
721 usage=usage, description=description,
722 formatter=MyHelpFormatter(),
724 parser.set_defaults(skip_initial_test=False)
725 parser.add_option(
726 '--skip-initial-test',
727 action='store_true', default=False,
728 help='skip verifying that the bug exists in the original repository',
731 (options, args) = parser.parse_args()
733 cvsrepo = args[0]
735 def test_command():
736 command(*args[1:])
738 if not os.path.isdir(tmpdir):
739 os.makedirs(tmpdir)
741 if not options.skip_initial_test:
742 sys.stdout.write('Testing with the original repository.\n')
743 try:
744 test_command()
745 except CommandFailedException, e:
746 sys.stderr.write(first_fail_message)
747 sys.exit(1)
748 sys.stdout.write(
749 'The bug is confirmed to exist in the initial repository.\n'
752 try:
753 try:
754 shrink_repository(test_command, cvsrepo)
755 except KeyboardInterrupt:
756 pass
757 finally:
758 try:
759 os.rmdir(tmpdir)
760 except Exception, e:
761 sys.stderr.write('ERROR: %s (ignored)\n' % (e,))
764 if __name__ == '__main__':
765 main()