tools/bisect-perf-regression_test.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import unittest
   6
   7 from auto_bisect import source_control as source_control_module
   8
   9 # Special import necessary because filename contains dash characters.
  10 bisect_perf_module = __import__('bisect-perf-regression')
  11
  12
  13 class BisectPerfRegressionTest(unittest.TestCase):
  14   """Test case for other functions and classes in bisect-perf-regression.py."""
  15
  16   def _AssertConfidence(self, score, bad_values, good_values):
  17     """Checks whether the given sets of values have a given confidence score.
  18
  19     The score represents our confidence that the two sets of values wouldn't
  20     be as different as they are just by chance; that is, that some real change
  21     occurred between the two sets of values.
  22
  23     Args:
  24       score: Expected confidence score.
  25       bad_values: First list of numbers.
  26       good_values: Second list of numbers.
  27     """
  28     # ConfidenceScore takes a list of lists but these lists are flattened.
  29     confidence = bisect_perf_module.ConfidenceScore([bad_values], [good_values])
  30     self.assertEqual(score, confidence)
  31
  32   def testConfidenceScore_ZeroConfidence(self):
  33     # The good and bad sets contain the same values, so the confidence that
  34     # they're different should be zero.
  35     self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
  36
  37   def testConfidenceScore_MediumConfidence(self):
  38     self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
  39
  40   def testConfidenceScore_HighConfidence(self):
  41     self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
  42
  43   def testConfidenceScore_VeryHighConfidence(self):
  44     # Confidence is high if the two sets of values have no internal variance.
  45     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
  46     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
  47
  48   def testConfidenceScore_ImbalancedSampleSize(self):
  49     # The second set of numbers only contains one number, so confidence is low.
  50     self._AssertConfidence(
  51         80.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3],[1.4])
  52
  53   def testConfidenceScore_EmptySample(self):
  54     # Confidence is zero if either or both samples are empty.
  55     self._AssertConfidence(0.0, [], [])
  56     self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
  57     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
  58
  59   def testConfidenceScore_FunctionalTestResults(self):
  60     self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
  61     self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
  62
  63   def testParseDEPSStringManually(self):
  64     """Tests DEPS parsing."""
  65     deps_file_contents = """
  66     vars = {
  67         'ffmpeg_hash':
  68              '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
  69         'webkit_url':
  70              'https://chromium.googlesource.com/chromium/blink.git',
  71         'git_url':
  72              'https://chromium.googlesource.com',
  73         'webkit_rev':
  74              '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
  75         'angle_revision':
  76              '74697cf2064c0a2c0d7e1b1b28db439286766a05'
  77     }"""
  78
  79     # Should only expect SVN/git revisions to come through, and URLs should be
  80     # filtered out.
  81     expected_vars_dict = {
  82         'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
  83         'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
  84         'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'
  85     }
  86     # Testing private function.
  87     # pylint: disable=W0212
  88     vars_dict = bisect_perf_module._ParseRevisionsFromDEPSFileManually(
  89         deps_file_contents)
  90     self.assertEqual(vars_dict, expected_vars_dict)
  91
  92   def _AssertParseResult(self, expected_values, result_string):
  93     """Asserts some values are parsed from a RESULT line."""
  94     results_template = ('RESULT other_chart: other_trace= 123 count\n'
  95                         'RESULT my_chart: my_trace= %(value)s\n')
  96     results = results_template % {'value': result_string}
  97     metric = ['my_chart', 'my_trace']
  98     # Testing private function.
  99     # pylint: disable=W0212
 100     values = bisect_perf_module._TryParseResultValuesFromOutput(metric, results)
 101     self.assertEqual(expected_values, values)
 102
 103   def testTryParseResultValuesFromOutput_WithSingleValue(self):
 104     """Tests result pattern <*>RESULT <graph>: <trace>= <value>"""
 105     self._AssertParseResult([66.88], '66.88 kb')
 106     self._AssertParseResult([66.88], '66.88 ')
 107     self._AssertParseResult([-66.88], '-66.88 kb')
 108     self._AssertParseResult([66], '66 kb')
 109     self._AssertParseResult([0.66], '.66 kb')
 110     self._AssertParseResult([], '. kb')
 111     self._AssertParseResult([], 'aaa kb')
 112
 113   def testTryParseResultValuesFromOutput_WithMultiValue(self):
 114     """Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""
 115     self._AssertParseResult([66.88], '[66.88] kb')
 116     self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')
 117     self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')
 118     self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')
 119     self._AssertParseResult([-66, 99], '[-66,99] kb')
 120     self._AssertParseResult([-66, 99], '[-66,99,] kb')
 121     self._AssertParseResult([-66, 0.99], '[-66,.99] kb')
 122     self._AssertParseResult([], '[] kb')
 123     self._AssertParseResult([], '[-66,abc] kb')
 124
 125   def testTryParseResultValuesFromOutputWithMeanStd(self):
 126     """Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""
 127     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 128     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 129     self._AssertParseResult([33.22], '{33.22,3.6}kb')
 130     self._AssertParseResult([33.22], '{33.22,3.6} kb')
 131     self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')
 132     self._AssertParseResult([-33.22], '{-33.22,3.6}kb')
 133     self._AssertParseResult([22], '{22,6}kb')
 134     self._AssertParseResult([.22], '{.22,6}kb')
 135     self._AssertParseResult([], '{.22,6, 44}kb')
 136     self._AssertParseResult([], '{}kb')
 137     self._AssertParseResult([], '{XYZ}kb')
 138
 139   def _AssertCompatibleCommand(
 140       self, expected_command, original_command, revision, target_platform):
 141     """Tests the modification of the command that might be done.
 142
 143     This modification to the command is done in order to get a Telemetry
 144     command that works; before some revisions, the browser name that Telemetry
 145     expects is different in some cases, but we want it to work anyway.
 146
 147     Specifically, only for android:
 148       After r276628, only android-chrome-shell works.
 149       Prior to r274857, only android-chromium-testshell works.
 150       In the range [274857, 276628], both work.
 151     """
 152     bisect_options = bisect_perf_module.BisectOptions()
 153     bisect_options.output_buildbot_annotations = None
 154     source_control = source_control_module.DetermineAndCreateSourceControl(
 155         bisect_options)
 156     bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
 157         source_control, bisect_options)
 158     bisect_instance.opts.target_platform = target_platform
 159     git_revision = bisect_instance.source_control.ResolveToRevision(
 160         revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)
 161     depot = 'chromium'
 162     command = bisect_instance.GetCompatibleCommand(
 163         original_command, git_revision, depot)
 164     self.assertEqual(expected_command, command)
 165
 166   def testGetCompatibleCommand_ChangeToTestShell(self):
 167     # For revisions <= r274857, only android-chromium-testshell is used.
 168     self._AssertCompatibleCommand(
 169         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 170         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 171         274857, 'android')
 172
 173   def testGetCompatibleCommand_ChangeToShell(self):
 174     # For revisions >= r276728, only android-chrome-shell can be used.
 175     self._AssertCompatibleCommand(
 176         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 177         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 178         276628, 'android')
 179
 180   def testGetCompatibleCommand_NoChange(self):
 181     # For revisions < r276728, android-chromium-testshell can be used.
 182     self._AssertCompatibleCommand(
 183         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 184         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 185         274858, 'android')
 186     # For revisions > r274857, android-chrome-shell can be used.
 187     self._AssertCompatibleCommand(
 188         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 189         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 190         274858, 'android')
 191
 192   def testGetCompatibleCommand_NonAndroidPlatform(self):
 193     # In most cases, there's no need to change Telemetry command.
 194     # For revisions >= r276728, only android-chrome-shell can be used.
 195     self._AssertCompatibleCommand(
 196         'tools/perf/run_benchmark -v --browser=release foo',
 197         'tools/perf/run_benchmark -v --browser=release foo',
 198         276628, 'chromium')
 199
 200   # This method doesn't reference self; it fails if an error is thrown.
 201   # pylint: disable=R0201
 202   def testDryRun(self):
 203     """Does a dry run of the bisect script.
 204
 205     This serves as a smoke test to catch errors in the basic execution of the
 206     script.
 207     """
 208     options_dict = {
 209       'debug_ignore_build': True,
 210       'debug_ignore_sync': True,
 211       'debug_ignore_perf_test': True,
 212       'command': 'fake_command',
 213       'metric': 'fake/metric',
 214       'good_revision': 280000,
 215       'bad_revision': 280005,
 216     }
 217     bisect_options = bisect_perf_module.BisectOptions.FromDict(options_dict)
 218     source_control = source_control_module.DetermineAndCreateSourceControl(
 219         bisect_options)
 220     bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
 221         source_control, bisect_options)
 222     results = bisect_instance.Run(bisect_options.command,
 223                                   bisect_options.bad_revision,
 224                                   bisect_options.good_revision,
 225                                   bisect_options.metric)
 226     bisect_instance.FormatAndPrintResults(results)
 227
 228
 229 if __name__ == '__main__':
 230   unittest.main()