nptl/tst-thread-exit-clobber: Run with any C++ compiler
[glibc.git] / benchtests / scripts / compare_bench.py
blobea25f778c09bba9d7f7b29139fcd985851155385
1 #!/usr/bin/python
2 # Copyright (C) 2015-2018 Free Software Foundation, Inc.
3 # This file is part of the GNU C Library.
5 # The GNU C Library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
10 # The GNU C Library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with the GNU C Library; if not, see
17 # <http://www.gnu.org/licenses/>.
18 """Compare two benchmark results
20 Given two benchmark result files and a threshold, this script compares the
21 benchmark results and flags differences in performance beyond a given
22 threshold.
23 """
24 import sys
25 import os
26 import pylab
27 import import_bench as bench
29 def do_compare(func, var, tl1, tl2, par, threshold):
30 """Compare one of the aggregate measurements
32 Helper function to compare one of the aggregate measurements of a function
33 variant.
35 Args:
36 func: Function name
37 var: Function variant name
38 tl1: The first timings list
39 tl2: The second timings list
40 par: The aggregate to measure
41 threshold: The threshold for differences, beyond which the script should
42 print a warning.
43 """
44 d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)]
45 if d > threshold:
46 if tl1[par] > tl2[par]:
47 ind = '+++'
48 else:
49 ind = '---'
50 print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
51 (ind, func, var, par, d, tl1[par], tl2[par]))
54 def compare_runs(pts1, pts2, threshold):
55 """Compare two benchmark runs
57 Args:
58 pts1: Timing data from first machine
59 pts2: Timing data from second machine
60 """
62 # XXX We assume that the two benchmarks have identical functions and
63 # variants. We cannot compare two benchmarks that may have different
64 # functions or variants. Maybe that is something for the future.
65 for func in pts1['functions'].keys():
66 for var in pts1['functions'][func].keys():
67 tl1 = pts1['functions'][func][var]
68 tl2 = pts2['functions'][func][var]
70 # Compare the consolidated numbers
71 # do_compare(func, var, tl1, tl2, 'max', threshold)
72 do_compare(func, var, tl1, tl2, 'min', threshold)
73 do_compare(func, var, tl1, tl2, 'mean', threshold)
75 # Skip over to the next variant or function if there is no detailed
76 # timing info for the function variant.
77 if 'timings' not in pts1['functions'][func][var].keys() or \
78 'timings' not in pts2['functions'][func][var].keys():
79 return
81 # If two lists do not have the same length then it is likely that
82 # the performance characteristics of the function have changed.
83 # XXX: It is also likely that there was some measurement that
84 # strayed outside the usual range. Such ouiers should not
85 # happen on an idle machine with identical hardware and
86 # configuration, but ideal environments are hard to come by.
87 if len(tl1['timings']) != len(tl2['timings']):
88 print('* %s(%s): Timing characteristics changed' %
89 (func, var))
90 print('\tBefore: [%s]' %
91 ', '.join([str(x) for x in tl1['timings']]))
92 print('\tAfter: [%s]' %
93 ', '.join([str(x) for x in tl2['timings']]))
94 continue
96 # Collect numbers whose differences cross the threshold we have
97 # set.
98 issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
99 if abs(y - x) * 100 / x > threshold]
101 # Now print them.
102 for t1, t2 in issues:
103 d = abs(t2 - t1) * 100 / t1
104 if t2 > t1:
105 ind = '-'
106 else:
107 ind = '+'
109 print("%s %s(%s): (%.2lf%%) from %g to %g" %
110 (ind, func, var, d, t1, t2))
113 def plot_graphs(bench1, bench2):
114 """Plot graphs for functions
116 Make scatter plots for the functions and their variants.
118 Args:
119 bench1: Set of points from the first machine
120 bench2: Set of points from the second machine.
122 for func in bench1['functions'].keys():
123 for var in bench1['functions'][func].keys():
124 # No point trying to print a graph if there are no detailed
125 # timings.
126 if u'timings' not in bench1['functions'][func][var].keys():
127 print('Skipping graph for %s(%s)' % (func, var))
128 continue
130 pylab.clf()
131 pylab.ylabel('Time (cycles)')
133 # First set of points
134 length = len(bench1['functions'][func][var]['timings'])
135 X = [float(x) for x in range(length)]
136 lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
137 1.5 + 100 / length)
138 pylab.setp(lines, 'color', 'r')
140 # Second set of points
141 length = len(bench2['functions'][func][var]['timings'])
142 X = [float(x) for x in range(length)]
143 lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
144 1.5 + 100 / length)
145 pylab.setp(lines, 'color', 'g')
147 if var:
148 filename = "%s-%s.png" % (func, var)
149 else:
150 filename = "%s.png" % func
151 print('Writing out %s' % filename)
152 pylab.savefig(filename)
155 def main(args):
156 """Program Entry Point
158 Take two benchmark output files and compare their timings.
160 if len(args) > 4 or len(args) < 3:
161 print('Usage: %s <schema> <file1> <file2> [threshold in %%]' % sys.argv[0])
162 sys.exit(os.EX_USAGE)
164 bench1 = bench.parse_bench(args[1], args[0])
165 bench2 = bench.parse_bench(args[2], args[0])
166 if len(args) == 4:
167 threshold = float(args[3])
168 else:
169 threshold = 10.0
171 if (bench1['timing_type'] != bench2['timing_type']):
172 print('Cannot compare benchmark outputs: timing types are different')
173 return
175 plot_graphs(bench1, bench2)
177 bench.compress_timings(bench1)
178 bench.compress_timings(bench2)
180 compare_runs(bench1, bench2, threshold)
183 if __name__ == '__main__':
184 main(sys.argv[1:])