tools/bisect-perf-regression_test.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import os
   6 import re
   7 import unittest
   8
   9 from auto_bisect import source_control as source_control_module
  10
  11 # Special import necessary because filename contains dash characters.
  12 bisect_perf_module = __import__('bisect-perf-regression')
  13
  14 def _GetBisectPerformanceMetricsInstance():
  15   """Returns an instance of the BisectPerformanceMetrics class."""
  16   options_dict = {
  17     'debug_ignore_build': True,
  18     'debug_ignore_sync': True,
  19     'debug_ignore_perf_test': True,
  20     'command': 'fake_command',
  21     'metric': 'fake/metric',
  22     'good_revision': 280000,
  23     'bad_revision': 280005,
  24   }
  25   bisect_options = bisect_perf_module.BisectOptions.FromDict(options_dict)
  26   source_control = source_control_module.DetermineAndCreateSourceControl(
  27       bisect_options)
  28   bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
  29       source_control, bisect_options)
  30   bisect_instance.src_cwd = os.path.abspath(
  31       os.path.join(os.path.dirname(__file__), os.path.pardir))
  32   return bisect_instance
  33
  34
  35 class BisectPerfRegressionTest(unittest.TestCase):
  36   """Test case for other functions and classes in bisect-perf-regression.py."""
  37
  38   def _AssertConfidence(self, score, bad_values, good_values):
  39     """Checks whether the given sets of values have a given confidence score.
  40
  41     The score represents our confidence that the two sets of values wouldn't
  42     be as different as they are just by chance; that is, that some real change
  43     occurred between the two sets of values.
  44
  45     Args:
  46       score: Expected confidence score.
  47       bad_values: First list of numbers.
  48       good_values: Second list of numbers.
  49     """
  50     # ConfidenceScore takes a list of lists but these lists are flattened
  51     # inside the function.
  52     confidence = bisect_perf_module.ConfidenceScore(
  53         [[v] for v in bad_values],
  54         [[v] for v in good_values])
  55     self.assertEqual(score, confidence)
  56
  57   def testConfidenceScore_ZeroConfidence(self):
  58     # The good and bad sets contain the same values, so the confidence that
  59     # they're different should be zero.
  60     self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
  61
  62   def testConfidenceScore_MediumConfidence(self):
  63     self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
  64
  65   def testConfidenceScore_HighConfidence(self):
  66     self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
  67
  68   def testConfidenceScore_VeryHighConfidence(self):
  69     # Confidence is high if the two sets of values have no internal variance.
  70     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
  71     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
  72
  73   def testConfidenceScore_UnbalancedSampleSize(self):
  74     # The second set of numbers only contains one number, so confidence is 0.
  75     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
  76
  77   def testConfidenceScore_EmptySample(self):
  78     # Confidence is zero if either or both samples are empty.
  79     self._AssertConfidence(0.0, [], [])
  80     self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
  81     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
  82
  83   def testConfidenceScore_FunctionalTestResults(self):
  84     self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
  85     self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
  86
  87   def testConfidenceScore_RealWorldCases(self):
  88     """This method contains a set of data from actual bisect results.
  89
  90     The confidence scores asserted below were all copied from the actual
  91     results, so the purpose of this test method is mainly to show what the
  92     results for real cases are, and compare when we change the confidence
  93     score function in the future.
  94     """
  95     self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
  96     self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
  97     self._AssertConfidence(80, [67, 68], [65, 65, 67])
  98     self._AssertConfidence(0, [514], [514])
  99     self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
 100     self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
 101     self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
 102     self._AssertConfidence(0, [1999004, 1999627], [223355])
 103     self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
 104     self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
 105
 106   def testParseDEPSStringManually(self):
 107     """Tests DEPS parsing."""
 108     deps_file_contents = """
 109     vars = {
 110         'ffmpeg_hash':
 111              '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
 112         'webkit_url':
 113              'https://chromium.googlesource.com/chromium/blink.git',
 114         'git_url':
 115              'https://chromium.googlesource.com',
 116         'webkit_rev':
 117              '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
 118         'angle_revision':
 119              '74697cf2064c0a2c0d7e1b1b28db439286766a05'
 120     }"""
 121
 122     # Should only expect SVN/git revisions to come through, and URLs should be
 123     # filtered out.
 124     expected_vars_dict = {
 125         'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
 126         'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
 127         'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'
 128     }
 129     # Testing private function.
 130     # pylint: disable=W0212
 131     vars_dict = bisect_perf_module._ParseRevisionsFromDEPSFileManually(
 132         deps_file_contents)
 133     self.assertEqual(vars_dict, expected_vars_dict)
 134
 135   def _AssertParseResult(self, expected_values, result_string):
 136     """Asserts some values are parsed from a RESULT line."""
 137     results_template = ('RESULT other_chart: other_trace= 123 count\n'
 138                         'RESULT my_chart: my_trace= %(value)s\n')
 139     results = results_template % {'value': result_string}
 140     metric = ['my_chart', 'my_trace']
 141     # Testing private function.
 142     # pylint: disable=W0212
 143     values = bisect_perf_module._TryParseResultValuesFromOutput(metric, results)
 144     self.assertEqual(expected_values, values)
 145
 146   def testTryParseResultValuesFromOutput_WithSingleValue(self):
 147     """Tests result pattern <*>RESULT <graph>: <trace>= <value>"""
 148     self._AssertParseResult([66.88], '66.88 kb')
 149     self._AssertParseResult([66.88], '66.88 ')
 150     self._AssertParseResult([-66.88], '-66.88 kb')
 151     self._AssertParseResult([66], '66 kb')
 152     self._AssertParseResult([0.66], '.66 kb')
 153     self._AssertParseResult([], '. kb')
 154     self._AssertParseResult([], 'aaa kb')
 155
 156   def testTryParseResultValuesFromOutput_WithMultiValue(self):
 157     """Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""
 158     self._AssertParseResult([66.88], '[66.88] kb')
 159     self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')
 160     self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')
 161     self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')
 162     self._AssertParseResult([-66, 99], '[-66,99] kb')
 163     self._AssertParseResult([-66, 99], '[-66,99,] kb')
 164     self._AssertParseResult([-66, 0.99], '[-66,.99] kb')
 165     self._AssertParseResult([], '[] kb')
 166     self._AssertParseResult([], '[-66,abc] kb')
 167
 168   def testTryParseResultValuesFromOutputWithMeanStd(self):
 169     """Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""
 170     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 171     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 172     self._AssertParseResult([33.22], '{33.22,3.6}kb')
 173     self._AssertParseResult([33.22], '{33.22,3.6} kb')
 174     self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')
 175     self._AssertParseResult([-33.22], '{-33.22,3.6}kb')
 176     self._AssertParseResult([22], '{22,6}kb')
 177     self._AssertParseResult([.22], '{.22,6}kb')
 178     self._AssertParseResult([], '{.22,6, 44}kb')
 179     self._AssertParseResult([], '{}kb')
 180     self._AssertParseResult([], '{XYZ}kb')
 181
 182   def _AssertCompatibleCommand(
 183       self, expected_command, original_command, revision, target_platform):
 184     """Tests the modification of the command that might be done.
 185
 186     This modification to the command is done in order to get a Telemetry
 187     command that works; before some revisions, the browser name that Telemetry
 188     expects is different in some cases, but we want it to work anyway.
 189
 190     Specifically, only for android:
 191       After r276628, only android-chrome-shell works.
 192       Prior to r274857, only android-chromium-testshell works.
 193       In the range [274857, 276628], both work.
 194     """
 195     bisect_options = bisect_perf_module.BisectOptions()
 196     bisect_options.output_buildbot_annotations = None
 197     source_control = source_control_module.DetermineAndCreateSourceControl(
 198         bisect_options)
 199     bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
 200         source_control, bisect_options)
 201     bisect_instance.opts.target_platform = target_platform
 202     git_revision = bisect_instance.source_control.ResolveToRevision(
 203         revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)
 204     depot = 'chromium'
 205     command = bisect_instance.GetCompatibleCommand(
 206         original_command, git_revision, depot)
 207     self.assertEqual(expected_command, command)
 208
 209   def testGetCompatibleCommand_ChangeToTestShell(self):
 210     # For revisions <= r274857, only android-chromium-testshell is used.
 211     self._AssertCompatibleCommand(
 212         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 213         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 214         274857, 'android')
 215
 216   def testGetCompatibleCommand_ChangeToShell(self):
 217     # For revisions >= r276728, only android-chrome-shell can be used.
 218     self._AssertCompatibleCommand(
 219         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 220         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 221         276628, 'android')
 222
 223   def testGetCompatibleCommand_NoChange(self):
 224     # For revisions < r276728, android-chromium-testshell can be used.
 225     self._AssertCompatibleCommand(
 226         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 227         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 228         274858, 'android')
 229     # For revisions > r274857, android-chrome-shell can be used.
 230     self._AssertCompatibleCommand(
 231         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 232         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 233         274858, 'android')
 234
 235   def testGetCompatibleCommand_NonAndroidPlatform(self):
 236     # In most cases, there's no need to change Telemetry command.
 237     # For revisions >= r276728, only android-chrome-shell can be used.
 238     self._AssertCompatibleCommand(
 239         'tools/perf/run_benchmark -v --browser=release foo',
 240         'tools/perf/run_benchmark -v --browser=release foo',
 241         276628, 'chromium')
 242
 243   # This method doesn't reference self; it fails if an error is thrown.
 244   # pylint: disable=R0201
 245   def testDryRun(self):
 246     """Does a dry run of the bisect script.
 247
 248     This serves as a smoke test to catch errors in the basic execution of the
 249     script.
 250     """
 251     bisect_instance = _GetBisectPerformanceMetricsInstance()
 252     results = bisect_instance.Run(bisect_instance.opts.command,
 253                                   bisect_instance.opts.bad_revision,
 254                                   bisect_instance.opts.good_revision,
 255                                   bisect_instance.opts.metric)
 256     bisect_instance.FormatAndPrintResults(results)
 257
 258   def testGetCommitPosition(self):
 259     bisect_instance = _GetBisectPerformanceMetricsInstance()
 260     cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'
 261     self.assertEqual(
 262         291765, bisect_instance.source_control.GetCommitPosition(cp_git_rev))
 263
 264     svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'
 265     self.assertEqual(
 266         291467, bisect_instance.source_control.GetCommitPosition(svn_git_rev))
 267
 268   def testGetCommitPositionForV8(self):
 269     bisect_instance = _GetBisectPerformanceMetricsInstance()
 270     v8_rev = '21d700eedcdd6570eff22ece724b63a5eefe78cb'
 271     depot_path = os.path.join(bisect_instance.src_cwd, 'src', 'v8')
 272     self.assertEqual(
 273         23634,
 274         bisect_instance.source_control.GetCommitPosition(v8_rev, depot_path))
 275
 276   def testGetCommitPositionForWebKit(self):
 277     bisect_instance = _GetBisectPerformanceMetricsInstance()
 278     wk_rev = 'a94d028e0f2c77f159b3dac95eb90c3b4cf48c61'
 279     depot_path = os.path.join(bisect_instance.src_cwd, 'src', 'third_party',
 280                               'WebKit')
 281     self.assertEqual(
 282         181660,
 283         bisect_instance.source_control.GetCommitPosition(wk_rev, depot_path))
 284
 285   def testUpdateDepsContent(self):
 286     bisect_instance = _GetBisectPerformanceMetricsInstance()
 287     deps_file = 'DEPS'
 288     # We are intentionally reading DEPS file contents instead of string literal
 289     # with few lines from DEPS because to check if the format we are expecting
 290     # to search is not changed in DEPS content.
 291     # TODO (prasadv): Add a separate test to validate the DEPS contents with the
 292     # format that bisect script expects.
 293     deps_contents = bisect_perf_module.ReadStringFromFile(deps_file)
 294     deps_key = 'v8_revision'
 295     depot = 'v8'
 296     git_revision = 'a12345789a23456789a123456789a123456789'
 297     updated_content = bisect_instance.UpdateDepsContents(
 298         deps_contents, depot, git_revision, deps_key)
 299     self.assertIsNotNone(updated_content)
 300     ss = re.compile('["\']%s["\']: ["\']%s["\']' % (deps_key, git_revision))
 301     self.assertIsNotNone(re.search(ss, updated_content))
 302
 303
 304 if __name__ == '__main__':
 305   unittest.main()