Revert of Removed the ScriptedSpeech content side. (patchset #2 id:20001 of https...
[chromium-blink-merge.git] / tools / bisect-perf-regression_test.py
blobb91179216293505e5d893d6fa4c0c8a238324a94
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import os
6 import re
7 import unittest
9 from auto_bisect import source_control as source_control_module
11 # Special import necessary because filename contains dash characters.
12 bisect_perf_module = __import__('bisect-perf-regression')
14 def _GetBisectPerformanceMetricsInstance():
15 """Returns an instance of the BisectPerformanceMetrics class."""
16 options_dict = {
17 'debug_ignore_build': True,
18 'debug_ignore_sync': True,
19 'debug_ignore_perf_test': True,
20 'command': 'fake_command',
21 'metric': 'fake/metric',
22 'good_revision': 280000,
23 'bad_revision': 280005,
25 bisect_options = bisect_perf_module.BisectOptions.FromDict(options_dict)
26 source_control = source_control_module.DetermineAndCreateSourceControl(
27 bisect_options)
28 bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
29 source_control, bisect_options)
30 bisect_instance.src_cwd = os.path.abspath(
31 os.path.join(os.path.dirname(__file__), os.path.pardir))
32 return bisect_instance
35 class BisectPerfRegressionTest(unittest.TestCase):
36 """Test case for other functions and classes in bisect-perf-regression.py."""
38 def _AssertConfidence(self, score, bad_values, good_values):
39 """Checks whether the given sets of values have a given confidence score.
41 The score represents our confidence that the two sets of values wouldn't
42 be as different as they are just by chance; that is, that some real change
43 occurred between the two sets of values.
45 Args:
46 score: Expected confidence score.
47 bad_values: First list of numbers.
48 good_values: Second list of numbers.
49 """
50 # ConfidenceScore takes a list of lists but these lists are flattened
51 # inside the function.
52 confidence = bisect_perf_module.ConfidenceScore(
53 [[v] for v in bad_values],
54 [[v] for v in good_values])
55 self.assertEqual(score, confidence)
57 def testConfidenceScore_ZeroConfidence(self):
58 # The good and bad sets contain the same values, so the confidence that
59 # they're different should be zero.
60 self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
62 def testConfidenceScore_MediumConfidence(self):
63 self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
65 def testConfidenceScore_HighConfidence(self):
66 self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
68 def testConfidenceScore_VeryHighConfidence(self):
69 # Confidence is high if the two sets of values have no internal variance.
70 self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
71 self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
73 def testConfidenceScore_UnbalancedSampleSize(self):
74 # The second set of numbers only contains one number, so confidence is 0.
75 self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
77 def testConfidenceScore_EmptySample(self):
78 # Confidence is zero if either or both samples are empty.
79 self._AssertConfidence(0.0, [], [])
80 self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
81 self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
83 def testConfidenceScore_FunctionalTestResults(self):
84 self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
85 self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
87 def testConfidenceScore_RealWorldCases(self):
88 """This method contains a set of data from actual bisect results.
90 The confidence scores asserted below were all copied from the actual
91 results, so the purpose of this test method is mainly to show what the
92 results for real cases are, and compare when we change the confidence
93 score function in the future.
94 """
95 self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
96 self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
97 self._AssertConfidence(80, [67, 68], [65, 65, 67])
98 self._AssertConfidence(0, [514], [514])
99 self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
100 self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
101 self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
102 self._AssertConfidence(0, [1999004, 1999627], [223355])
103 self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
104 self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
106 def testParseDEPSStringManually(self):
107 """Tests DEPS parsing."""
108 deps_file_contents = """
109 vars = {
110 'ffmpeg_hash':
111 '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
112 'webkit_url':
113 'https://chromium.googlesource.com/chromium/blink.git',
114 'git_url':
115 'https://chromium.googlesource.com',
116 'webkit_rev':
117 '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
118 'angle_revision':
119 '74697cf2064c0a2c0d7e1b1b28db439286766a05'
120 }"""
122 # Should only expect SVN/git revisions to come through, and URLs should be
123 # filtered out.
124 expected_vars_dict = {
125 'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
126 'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
127 'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'
129 # Testing private function.
130 # pylint: disable=W0212
131 vars_dict = bisect_perf_module._ParseRevisionsFromDEPSFileManually(
132 deps_file_contents)
133 self.assertEqual(vars_dict, expected_vars_dict)
135 def _AssertParseResult(self, expected_values, result_string):
136 """Asserts some values are parsed from a RESULT line."""
137 results_template = ('RESULT other_chart: other_trace= 123 count\n'
138 'RESULT my_chart: my_trace= %(value)s\n')
139 results = results_template % {'value': result_string}
140 metric = ['my_chart', 'my_trace']
141 # Testing private function.
142 # pylint: disable=W0212
143 values = bisect_perf_module._TryParseResultValuesFromOutput(metric, results)
144 self.assertEqual(expected_values, values)
146 def testTryParseResultValuesFromOutput_WithSingleValue(self):
147 """Tests result pattern <*>RESULT <graph>: <trace>= <value>"""
148 self._AssertParseResult([66.88], '66.88 kb')
149 self._AssertParseResult([66.88], '66.88 ')
150 self._AssertParseResult([-66.88], '-66.88 kb')
151 self._AssertParseResult([66], '66 kb')
152 self._AssertParseResult([0.66], '.66 kb')
153 self._AssertParseResult([], '. kb')
154 self._AssertParseResult([], 'aaa kb')
156 def testTryParseResultValuesFromOutput_WithMultiValue(self):
157 """Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""
158 self._AssertParseResult([66.88], '[66.88] kb')
159 self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')
160 self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')
161 self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')
162 self._AssertParseResult([-66, 99], '[-66,99] kb')
163 self._AssertParseResult([-66, 99], '[-66,99,] kb')
164 self._AssertParseResult([-66, 0.99], '[-66,.99] kb')
165 self._AssertParseResult([], '[] kb')
166 self._AssertParseResult([], '[-66,abc] kb')
168 def testTryParseResultValuesFromOutputWithMeanStd(self):
169 """Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""
170 self._AssertParseResult([33.22], '{33.22, 3.6} kb')
171 self._AssertParseResult([33.22], '{33.22, 3.6} kb')
172 self._AssertParseResult([33.22], '{33.22,3.6}kb')
173 self._AssertParseResult([33.22], '{33.22,3.6} kb')
174 self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')
175 self._AssertParseResult([-33.22], '{-33.22,3.6}kb')
176 self._AssertParseResult([22], '{22,6}kb')
177 self._AssertParseResult([.22], '{.22,6}kb')
178 self._AssertParseResult([], '{.22,6, 44}kb')
179 self._AssertParseResult([], '{}kb')
180 self._AssertParseResult([], '{XYZ}kb')
182 def _AssertCompatibleCommand(
183 self, expected_command, original_command, revision, target_platform):
184 """Tests the modification of the command that might be done.
186 This modification to the command is done in order to get a Telemetry
187 command that works; before some revisions, the browser name that Telemetry
188 expects is different in some cases, but we want it to work anyway.
190 Specifically, only for android:
191 After r276628, only android-chrome-shell works.
192 Prior to r274857, only android-chromium-testshell works.
193 In the range [274857, 276628], both work.
195 bisect_options = bisect_perf_module.BisectOptions()
196 bisect_options.output_buildbot_annotations = None
197 source_control = source_control_module.DetermineAndCreateSourceControl(
198 bisect_options)
199 bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
200 source_control, bisect_options)
201 bisect_instance.opts.target_platform = target_platform
202 git_revision = bisect_instance.source_control.ResolveToRevision(
203 revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)
204 depot = 'chromium'
205 command = bisect_instance.GetCompatibleCommand(
206 original_command, git_revision, depot)
207 self.assertEqual(expected_command, command)
209 def testGetCompatibleCommand_ChangeToTestShell(self):
210 # For revisions <= r274857, only android-chromium-testshell is used.
211 self._AssertCompatibleCommand(
212 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
213 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
214 274857, 'android')
216 def testGetCompatibleCommand_ChangeToShell(self):
217 # For revisions >= r276728, only android-chrome-shell can be used.
218 self._AssertCompatibleCommand(
219 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
220 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
221 276628, 'android')
223 def testGetCompatibleCommand_NoChange(self):
224 # For revisions < r276728, android-chromium-testshell can be used.
225 self._AssertCompatibleCommand(
226 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
227 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
228 274858, 'android')
229 # For revisions > r274857, android-chrome-shell can be used.
230 self._AssertCompatibleCommand(
231 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
232 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
233 274858, 'android')
235 def testGetCompatibleCommand_NonAndroidPlatform(self):
236 # In most cases, there's no need to change Telemetry command.
237 # For revisions >= r276728, only android-chrome-shell can be used.
238 self._AssertCompatibleCommand(
239 'tools/perf/run_benchmark -v --browser=release foo',
240 'tools/perf/run_benchmark -v --browser=release foo',
241 276628, 'chromium')
243 # This method doesn't reference self; it fails if an error is thrown.
244 # pylint: disable=R0201
245 def testDryRun(self):
246 """Does a dry run of the bisect script.
248 This serves as a smoke test to catch errors in the basic execution of the
249 script.
251 bisect_instance = _GetBisectPerformanceMetricsInstance()
252 results = bisect_instance.Run(bisect_instance.opts.command,
253 bisect_instance.opts.bad_revision,
254 bisect_instance.opts.good_revision,
255 bisect_instance.opts.metric)
256 bisect_instance.FormatAndPrintResults(results)
258 def testGetCommitPosition(self):
259 bisect_instance = _GetBisectPerformanceMetricsInstance()
260 cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'
261 self.assertEqual(
262 291765, bisect_instance.source_control.GetCommitPosition(cp_git_rev))
264 svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'
265 self.assertEqual(
266 291467, bisect_instance.source_control.GetCommitPosition(svn_git_rev))
268 def testGetCommitPositionForV8(self):
269 bisect_instance = _GetBisectPerformanceMetricsInstance()
270 v8_rev = '21d700eedcdd6570eff22ece724b63a5eefe78cb'
271 depot_path = os.path.join(bisect_instance.src_cwd, 'src', 'v8')
272 self.assertEqual(
273 23634,
274 bisect_instance.source_control.GetCommitPosition(v8_rev, depot_path))
276 def testGetCommitPositionForWebKit(self):
277 bisect_instance = _GetBisectPerformanceMetricsInstance()
278 wk_rev = 'a94d028e0f2c77f159b3dac95eb90c3b4cf48c61'
279 depot_path = os.path.join(bisect_instance.src_cwd, 'src', 'third_party',
280 'WebKit')
281 self.assertEqual(
282 181660,
283 bisect_instance.source_control.GetCommitPosition(wk_rev, depot_path))
285 def testUpdateDepsContent(self):
286 bisect_instance = _GetBisectPerformanceMetricsInstance()
287 deps_file = 'DEPS'
288 # We are intentionally reading DEPS file contents instead of string literal
289 # with few lines from DEPS because to check if the format we are expecting
290 # to search is not changed in DEPS content.
291 # TODO (prasadv): Add a separate test to validate the DEPS contents with the
292 # format that bisect script expects.
293 deps_contents = bisect_perf_module.ReadStringFromFile(deps_file)
294 deps_key = 'v8_revision'
295 depot = 'v8'
296 git_revision = 'a12345789a23456789a123456789a123456789'
297 updated_content = bisect_instance.UpdateDepsContents(
298 deps_contents, depot, git_revision, deps_key)
299 self.assertIsNotNone(updated_content)
300 ss = re.compile('["\']%s["\']: ["\']%s["\']' % (deps_key, git_revision))
301 self.assertIsNotNone(re.search(ss, updated_content))
304 if __name__ == '__main__':
305 unittest.main()