Use stdin redirection, not a pipe, when showing how to run git fast-import.
[cvs2svn.git] / contrib / shrink_test_case.py
blobfb6a6664bb2e775725144602391423a52c16a6d8
1 #! /usr/bin/python
3 # (Be in -*- python -*- mode.)
5 # ====================================================================
6 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://subversion.tigris.org/license-1.html.
11 # If newer versions of this license are posted there, you may use a
12 # newer version instead, at your option.
14 # This software consists of voluntary contributions made by many
15 # individuals. For exact contribution history, see the revision
16 # history and logs, available at http://cvs2svn.tigris.org/.
17 # ====================================================================
19 """Shrink a test case as much as possible.
21 !!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
22 !! This script irretrievably destroys the CVS repository that it is !!
23 !! applied to! !!
24 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
26 This script is meant to be used to shrink the size of a CVS repository
27 that is to be used as a test case for cvs2svn. It tries to throw out
28 parts of the repository while preserving the bug.
30 CVSREPO should be the path of a copy of a CVS archive. TEST_COMMAND
31 is a command that should run successfully (i.e., with exit code '0')
32 if the bug is still present, and fail if the bug is absent."""
35 import sys
36 import os
37 import shutil
38 import optparse
39 from cStringIO import StringIO
41 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
43 from cvs2svn_lib.key_generator import KeyGenerator
45 import cvs2svn_rcsparse
48 from contrib.rcs_file_filter import WriteRCSFileSink
49 from contrib.rcs_file_filter import FilterSink
52 usage = 'USAGE: %prog [options] CVSREPO TEST_COMMAND'
53 description = """\
54 Simplify a CVS repository while preserving the presence of a bug.
56 ***THE CVS REPOSITORY WILL BE DESTROYED***
58 CVSREPO is the path to a CVS repository.
60 TEST_COMMAND is a command that runs successfully (i.e., with exit
61 code '0') if the bug is still present, and fails if the bug is
62 absent.
63 """
66 verbose = 1
68 tmpdir = 'shrink_test_case-tmp'
70 file_key_generator = KeyGenerator(1)
73 def get_tmp_filename():
74 return os.path.join(tmpdir, 'f%07d.tmp' % file_key_generator.gen_id())
77 class CommandFailedException(Exception):
78 pass
81 def command(cmd, *args):
82 if verbose >= 2:
83 sys.stderr.write('Running: %s %s...' % (cmd, ' '.join(args),))
84 retval = os.spawnlp(os.P_WAIT, cmd, cmd, *args)
85 if retval:
86 if verbose >= 2:
87 sys.stderr.write('failed (%s).\n' % retval)
88 raise CommandFailedException(' '.join([cmd] + list(args)))
89 else:
90 if verbose >= 2:
91 sys.stderr.write('succeeded.\n')
94 class Modification:
95 """A reversible modification that can be made to the repository."""
97 def get_size(self):
98 """Return the estimated size of this modification.
100 This should be approximately the number of bytes by which the
101 problem will be shrunk if this modification is successful. It
102 is used to choose the order to attempt the modifications."""
104 raise NotImplementedError()
106 def modify(self):
107 """Modify the repository.
109 Store enough information that the change can be reverted."""
111 raise NotImplementedError()
113 def revert(self):
114 """Revert this modification."""
116 raise NotImplementedError()
118 def commit(self):
119 """Make this modification permanent."""
121 raise NotImplementedError()
123 def try_mod(self, test_command):
124 if verbose >= 1:
125 sys.stdout.write('Testing with the following modifications:\n')
126 self.output(sys.stdout, ' ')
127 self.modify()
128 try:
129 test_command()
130 except CommandFailedException:
131 if verbose >= 1:
132 sys.stdout.write(
133 'The bug disappeared. Reverting modifications.\n'
135 else:
136 sys.stdout.write('Attempted modification unsuccessful.\n')
137 self.revert()
138 return False
139 except KeyboardInterrupt:
140 sys.stderr.write('Interrupted. Reverting last modifications.\n')
141 self.revert()
142 raise
143 except Exception:
144 sys.stderr.write(
145 'Unexpected exception. Reverting last modifications.\n'
147 self.revert()
148 raise
149 else:
150 self.commit()
151 if verbose >= 1:
152 sys.stdout.write('The bug remains. Keeping modifications.\n')
153 else:
154 sys.stdout.write(
155 'The bug remains after the following modifications:\n'
157 self.output(sys.stdout, ' ')
158 return True
160 def get_submodifications(self, success):
161 """Return a generator or iterable of submodifications.
163 Return submodifications that should be tried after this this
164 modification. SUCCESS specifies whether this modification was
165 successful."""
167 return []
169 def output(self, f, prefix=''):
170 raise NotImplementedError()
172 def __repr__(self):
173 return str(self)
176 class EmptyModificationListException(Exception):
177 pass
180 class SplitModification(Modification):
181 """Holds two modifications split out of a failing modification.
183 Because the original modification failed, it known that mod1+mod2
184 can't succeed. So if mod1 succeeds, mod2 need not be attempted
185 (though its submodifications are attempted)."""
187 def __init__(self, mod1, mod2):
188 # Choose mod1 to be the larger modification:
189 if mod2.get_size() > mod1.get_size():
190 mod1, mod2 = mod2, mod1
192 self.mod1 = mod1
193 self.mod2 = mod2
195 def get_size(self):
196 return self.mod1.get_size()
198 def modify(self):
199 self.mod1.modify()
201 def revert(self):
202 self.mod1.revert()
204 def commit(self):
205 self.mod1.commit()
207 def get_submodifications(self, success):
208 if success:
209 for mod in self.mod2.get_submodifications(False):
210 yield mod
211 else:
212 yield self.mod2
214 for mod in self.mod1.get_submodifications(success):
215 yield mod
217 def output(self, f, prefix=''):
218 self.mod1.output(f, prefix=prefix)
220 def __str__(self):
221 return 'SplitModification(%s, %s)' % (self.mod1, self.mod2,)
224 class CompoundModification(Modification):
225 def __init__(self, modifications):
226 if not modifications:
227 raise EmptyModificationListException()
228 self.modifications = modifications
229 self.size = sum(mod.get_size() for mod in self.modifications)
231 def get_size(self):
232 return self.size
234 def modify(self):
235 for modification in self.modifications:
236 modification.modify()
238 def revert(self):
239 for modification in self.modifications:
240 modification.revert()
242 def commit(self):
243 for modification in self.modifications:
244 modification.commit()
246 def get_submodifications(self, success):
247 if success:
248 # All modifications were completed successfully; no need
249 # to try subsets:
250 pass
251 elif len(self.modifications) == 1:
252 # Our modification list cannot be subdivided, but maybe
253 # the remaining modification can:
254 for mod in self.modifications[0].get_submodifications(False):
255 yield mod
256 else:
257 # Create subsets of each half of the list and put them in
258 # a SplitModification:
259 n = len(self.modifications) // 2
260 yield SplitModification(
261 create_modification(self.modifications[:n]),
262 create_modification(self.modifications[n:])
265 def output(self, f, prefix=''):
266 for modification in self.modifications:
267 modification.output(f, prefix=prefix)
269 def __str__(self):
270 return str(self.modifications)
273 def create_modification(mods):
274 """Create and return a Modification based on the iterable MODS.
276 Raise EmptyModificationListException if mods is empty."""
278 mods = list(mods)
279 if len(mods) == 1:
280 return mods[0]
281 else:
282 return CompoundModification(mods)
285 def compute_dir_size(path):
286 # Add a little bit for the directory itself.
287 size = 100L
288 for filename in os.listdir(path):
289 subpath = os.path.join(path, filename)
290 if os.path.isdir(subpath):
291 size += compute_dir_size(subpath)
292 elif os.path.isfile(subpath):
293 size += os.path.getsize(subpath)
295 return size
298 class DeleteDirectoryModification(Modification):
299 def __init__(self, path):
300 self.path = path
301 self.size = compute_dir_size(self.path)
303 def get_size(self):
304 return self.size
306 def modify(self):
307 self.tempfile = get_tmp_filename()
308 shutil.move(self.path, self.tempfile)
310 def revert(self):
311 shutil.move(self.tempfile, self.path)
312 self.tempfile = None
314 def commit(self):
315 shutil.rmtree(self.tempfile)
316 self.tempfile = None
318 def get_submodifications(self, success):
319 if success:
320 # The whole directory could be deleted; no need to recurse:
321 pass
322 else:
323 # Try deleting subdirectories:
324 mods = [
325 DeleteDirectoryModification(subdir)
326 for subdir in get_dirs(self.path)
328 if mods:
329 yield create_modification(mods)
331 # Try deleting files:
332 mods = [
333 DeleteFileModification(filename)
334 for filename in get_files(self.path)
336 if mods:
337 yield create_modification(mods)
339 def output(self, f, prefix=''):
340 f.write('%sDeleted directory %r\n' % (prefix, self.path,))
342 def __str__(self):
343 return 'DeleteDirectory(%r)' % self.path
346 class DeleteFileModification(Modification):
347 def __init__(self, path):
348 self.path = path
349 self.size = os.path.getsize(self.path)
351 def get_size(self):
352 return self.size
354 def modify(self):
355 self.tempfile = get_tmp_filename()
356 shutil.move(self.path, self.tempfile)
358 def revert(self):
359 shutil.move(self.tempfile, self.path)
360 self.tempfile = None
362 def commit(self):
363 os.remove(self.tempfile)
364 self.tempfile = None
366 def output(self, f, prefix=''):
367 f.write('%sDeleted file %r\n' % (prefix, self.path,))
369 def __str__(self):
370 return 'DeleteFile(%r)' % self.path
373 def rev_tuple(revision):
374 retval = [int(s) for s in revision.split('.') if int(s)]
375 if retval[-2] == 0:
376 del retval[-2]
377 return tuple(retval)
380 class RCSFileFilter:
381 def get_size(self):
382 raise NotImplementedError()
384 def get_filter_sink(self, sink):
385 raise NotImplementedError()
387 def filter(self, text):
388 fout = StringIO()
389 sink = WriteRCSFileSink(fout)
390 filter = self.get_filter_sink(sink)
391 cvs2svn_rcsparse.parse(StringIO(text), filter)
392 return fout.getvalue()
394 def get_subfilters(self):
395 return []
397 def output(self, f, prefix=''):
398 raise NotImplementedError()
401 class DeleteTagRCSFileFilter(RCSFileFilter):
402 class Sink(FilterSink):
403 def __init__(self, sink, tagname):
404 FilterSink.__init__(self, sink)
405 self.tagname = tagname
407 def define_tag(self, name, revision):
408 if name != self.tagname:
409 FilterSink.define_tag(self, name, revision)
411 def __init__(self, tagname):
412 self.tagname = tagname
414 def get_size(self):
415 return 50
417 def get_filter_sink(self, sink):
418 return self.Sink(sink, self.tagname)
420 def output(self, f, prefix=''):
421 f.write('%sDeleted tag %r\n' % (prefix, self.tagname,))
424 def get_tag_set(path):
425 class TagCollector(cvs2svn_rcsparse.Sink):
426 def __init__(self):
427 self.tags = set()
429 # A map { branch_tuple : name } for branches on which no
430 # revisions have yet been seen:
431 self.branches = {}
433 def define_tag(self, name, revision):
434 revtuple = rev_tuple(revision)
435 if len(revtuple) % 2 == 0:
436 # This is a tag (as opposed to branch)
437 self.tags.add(name)
438 else:
439 self.branches[revtuple] = name
441 def define_revision(
442 self, revision, timestamp, author, state, branches, next
444 branch = rev_tuple(revision)[:-1]
445 try:
446 del self.branches[branch]
447 except KeyError:
448 pass
450 def get_tags(self):
451 tags = self.tags
452 for branch in self.branches.values():
453 tags.add(branch)
454 return tags
456 tag_collector = TagCollector()
457 cvs2svn_rcsparse.parse(open(path, 'rb'), tag_collector)
458 return tag_collector.get_tags()
461 class DeleteBranchTreeRCSFileFilter(RCSFileFilter):
462 class Sink(FilterSink):
463 def __init__(self, sink, branch_rev):
464 FilterSink.__init__(self, sink)
465 self.branch_rev = branch_rev
467 def is_on_branch(self, revision):
468 revtuple = rev_tuple(revision)
469 return revtuple[:len(self.branch_rev)] == self.branch_rev
471 def define_tag(self, name, revision):
472 if not self.is_on_branch(revision):
473 FilterSink.define_tag(self, name, revision)
475 def define_revision(
476 self, revision, timestamp, author, state, branches, next
478 if not self.is_on_branch(revision):
479 branches = [
480 branch
481 for branch in branches
482 if not self.is_on_branch(branch)
484 FilterSink.define_revision(
485 self, revision, timestamp, author, state, branches, next
488 def set_revision_info(self, revision, log, text):
489 if not self.is_on_branch(revision):
490 FilterSink.set_revision_info(self, revision, log, text)
492 def __init__(self, branch_rev, subbranch_tree):
493 self.branch_rev = branch_rev
494 self.subbranch_tree = subbranch_tree
496 def get_size(self):
497 return 100
499 def get_filter_sink(self, sink):
500 return self.Sink(sink, self.branch_rev)
502 def get_subfilters(self):
503 for (branch_rev, subbranch_tree) in self.subbranch_tree:
504 yield DeleteBranchTreeRCSFileFilter(branch_rev, subbranch_tree)
506 def output(self, f, prefix=''):
507 f.write(
508 '%sDeleted branch %s\n'
509 % (prefix, '.'.join([str(s) for s in self.branch_rev]),)
513 def get_branch_tree(path):
514 """Return the forest of branches in path.
516 Return [(branch_revision, [sub_branch, ...]), ...], where
517 branch_revision is a revtuple and sub_branch has the same form as
518 the whole return value.
522 class BranchCollector(cvs2svn_rcsparse.Sink):
523 def __init__(self):
524 self.branches = {}
526 def define_revision(
527 self, revision, timestamp, author, state, branches, next
529 parent = rev_tuple(revision)[:-1]
530 if len(parent) == 1:
531 parent = (1,)
532 entry = self.branches.setdefault(parent, [])
533 for branch in branches:
534 entry.append(rev_tuple(branch)[:-1])
536 def _get_subbranches(self, parent):
537 retval = []
538 try:
539 branches = self.branches[parent]
540 except KeyError:
541 return []
542 del self.branches[parent]
543 for branch in branches:
544 subbranches = self._get_subbranches(branch)
545 retval.append((branch, subbranches,))
546 return retval
548 def get_branches(self):
549 retval = self._get_subbranches((1,))
550 assert not self.branches
551 return retval
553 branch_collector = BranchCollector()
554 cvs2svn_rcsparse.parse(open(path, 'rb'), branch_collector)
555 return branch_collector.get_branches()
558 class RCSFileModification(Modification):
559 """A Modification that involves changing the contents of an RCS file."""
561 def __init__(self, path, filters):
562 self.path = path
563 self.filters = filters[:]
564 self.size = 0
565 for filter in self.filters:
566 self.size += filter.get_size()
568 def get_size(self):
569 return self.size
571 def modify(self):
572 self.tempfile = get_tmp_filename()
573 shutil.move(self.path, self.tempfile)
574 text = open(self.tempfile, 'rb').read()
575 for filter in self.filters:
576 text = filter.filter(text)
577 open(self.path, 'wb').write(text)
579 def revert(self):
580 shutil.move(self.tempfile, self.path)
581 self.tempfile = None
583 def commit(self):
584 os.remove(self.tempfile)
585 self.tempfile = None
587 def get_submodifications(self, success):
588 if success:
589 # All filters completed successfully; no need to try
590 # subsets:
591 pass
592 elif len(self.filters) == 1:
593 # The last filter failed; see if it has any subfilters:
594 subfilters = list(self.filters[0].get_subfilters())
595 if subfilters:
596 yield RCSFileModification(self.path, subfilters)
597 else:
598 n = len(self.filters) // 2
599 yield SplitModification(
600 RCSFileModification(self.path, self.filters[:n]),
601 RCSFileModification(self.path, self.filters[n:])
604 def output(self, f, prefix=''):
605 f.write('%sModified file %r\n' % (prefix, self.path,))
606 for filter in self.filters:
607 filter.output(f, prefix=(prefix + ' '))
609 def __str__(self):
610 return 'RCSFileModification(%r)' % (self.filters,)
613 def try_modification_combinations(test_command, mods):
614 """Try MOD and its submodifications.
616 Return True if any modifications were successful."""
618 # A list of lists of modifications that should still be tried:
619 todo = list(mods)
621 while todo:
622 todo.sort(key=lambda mod: mod.get_size())
623 mod = todo.pop()
624 success = mod.try_mod(test_command)
625 # Now add possible submodifications to the list of things to try:
626 todo.extend(mod.get_submodifications(success))
629 def get_dirs(path):
630 filenames = os.listdir(path)
631 filenames.sort()
632 for filename in filenames:
633 subpath = os.path.join(path, filename)
634 if os.path.isdir(subpath):
635 yield subpath
638 def get_files(path, recurse=False):
639 filenames = os.listdir(path)
640 filenames.sort()
641 for filename in filenames:
642 subpath = os.path.join(path, filename)
643 if os.path.isfile(subpath):
644 yield subpath
645 elif recurse and os.path.isdir(subpath):
646 for x in get_files(subpath, recurse=recurse):
647 yield x
650 def shrink_repository(test_command, cvsrepo):
651 try_modification_combinations(
652 test_command, [DeleteDirectoryModification(cvsrepo)]
655 # Try deleting branches:
656 mods = []
657 for path in get_files(cvsrepo, recurse=True):
658 branch_tree = get_branch_tree(path)
659 if branch_tree:
660 filters = []
661 for (branch_revision, subbranch_tree) in branch_tree:
662 filters.append(
663 DeleteBranchTreeRCSFileFilter(
664 branch_revision, subbranch_tree
667 mods.append(RCSFileModification(path, filters))
668 if mods:
669 try_modification_combinations(test_command, mods)
671 # Try deleting tags:
672 mods = []
673 for path in get_files(cvsrepo, recurse=True):
674 tags = list(get_tag_set(path))
675 if tags:
676 tags.sort()
677 filters = [DeleteTagRCSFileFilter(tag) for tag in tags]
678 mods.append(RCSFileModification(path, filters))
680 if mods:
681 try_modification_combinations(test_command, mods)
684 first_fail_message = """\
685 ERROR! The test command failed with the original repository. The
686 test command should be designed so that it succeeds (indicating that
687 the bug is still present) with the original repository, and fails only
688 after the bug disappears. Please fix your test command and start
689 again.
693 class MyHelpFormatter(optparse.IndentedHelpFormatter):
694 """A HelpFormatter for optparse that doesn't reformat the description."""
696 def format_description(self, description):
697 return description
700 def main():
701 parser = optparse.OptionParser(
702 usage=usage, description=description,
703 formatter=MyHelpFormatter(),
705 parser.set_defaults(skip_initial_test=False)
706 parser.add_option(
707 '--skip-initial-test',
708 action='store_true', default=False,
709 help='skip verifying that the bug exists in the original repository',
712 (options, args) = parser.parse_args()
714 cvsrepo = args[0]
716 def test_command():
717 command(*args[1:])
719 if not os.path.isdir(tmpdir):
720 os.makedirs(tmpdir)
722 if not options.skip_initial_test:
723 sys.stdout.write('Testing with the original repository.\n')
724 try:
725 test_command()
726 except CommandFailedException, e:
727 sys.stderr.write(first_fail_message)
728 sys.exit(1)
729 sys.stdout.write(
730 'The bug is confirmed to exist in the initial repository.\n'
733 try:
734 try:
735 shrink_repository(test_command, cvsrepo)
736 except KeyboardInterrupt:
737 pass
738 finally:
739 try:
740 os.rmdir(tmpdir)
741 except Exception, e:
742 sys.stderr.write('ERROR: %s (ignored)\n' % (e,))
745 if __name__ == '__main__':
746 main()