Adjust name of ld.so in test-container.c.
[glibc.git] / benchtests / scripts / compare_bench.py
blob88e8911d812f463aeadd041f29ab8b7cbedd7e89
1 #!/usr/bin/python
2 # Copyright (C) 2015-2018 Free Software Foundation, Inc.
3 # This file is part of the GNU C Library.
5 # The GNU C Library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
10 # The GNU C Library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with the GNU C Library; if not, see
17 # <http://www.gnu.org/licenses/>.
18 """Compare two benchmark results
20 Given two benchmark result files and a threshold, this script compares the
21 benchmark results and flags differences in performance beyond a given
22 threshold.
23 """
24 import sys
25 import os
26 import pylab
27 import import_bench as bench
28 import argparse
30 def do_compare(func, var, tl1, tl2, par, threshold):
31 """Compare one of the aggregate measurements
33 Helper function to compare one of the aggregate measurements of a function
34 variant.
36 Args:
37 func: Function name
38 var: Function variant name
39 tl1: The first timings list
40 tl2: The second timings list
41 par: The aggregate to measure
42 threshold: The threshold for differences, beyond which the script should
43 print a warning.
44 """
45 d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)]
46 if d > threshold:
47 if tl1[par] > tl2[par]:
48 ind = '+++'
49 else:
50 ind = '---'
51 print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
52 (ind, func, var, par, d, tl1[par], tl2[par]))
55 def compare_runs(pts1, pts2, threshold):
56 """Compare two benchmark runs
58 Args:
59 pts1: Timing data from first machine
60 pts2: Timing data from second machine
61 """
63 # XXX We assume that the two benchmarks have identical functions and
64 # variants. We cannot compare two benchmarks that may have different
65 # functions or variants. Maybe that is something for the future.
66 for func in pts1['functions'].keys():
67 for var in pts1['functions'][func].keys():
68 tl1 = pts1['functions'][func][var]
69 tl2 = pts2['functions'][func][var]
71 # Compare the consolidated numbers
72 # do_compare(func, var, tl1, tl2, 'max', threshold)
73 do_compare(func, var, tl1, tl2, 'min', threshold)
74 do_compare(func, var, tl1, tl2, 'mean', threshold)
76 # Skip over to the next variant or function if there is no detailed
77 # timing info for the function variant.
78 if 'timings' not in pts1['functions'][func][var].keys() or \
79 'timings' not in pts2['functions'][func][var].keys():
80 return
82 # If two lists do not have the same length then it is likely that
83 # the performance characteristics of the function have changed.
84 # XXX: It is also likely that there was some measurement that
85 # strayed outside the usual range. Such ouiers should not
86 # happen on an idle machine with identical hardware and
87 # configuration, but ideal environments are hard to come by.
88 if len(tl1['timings']) != len(tl2['timings']):
89 print('* %s(%s): Timing characteristics changed' %
90 (func, var))
91 print('\tBefore: [%s]' %
92 ', '.join([str(x) for x in tl1['timings']]))
93 print('\tAfter: [%s]' %
94 ', '.join([str(x) for x in tl2['timings']]))
95 continue
97 # Collect numbers whose differences cross the threshold we have
98 # set.
99 issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
100 if abs(y - x) * 100 / x > threshold]
102 # Now print them.
103 for t1, t2 in issues:
104 d = abs(t2 - t1) * 100 / t1
105 if t2 > t1:
106 ind = '-'
107 else:
108 ind = '+'
110 print("%s %s(%s): (%.2lf%%) from %g to %g" %
111 (ind, func, var, d, t1, t2))
114 def plot_graphs(bench1, bench2):
115 """Plot graphs for functions
117 Make scatter plots for the functions and their variants.
119 Args:
120 bench1: Set of points from the first machine
121 bench2: Set of points from the second machine.
123 for func in bench1['functions'].keys():
124 for var in bench1['functions'][func].keys():
125 # No point trying to print a graph if there are no detailed
126 # timings.
127 if u'timings' not in bench1['functions'][func][var].keys():
128 print('Skipping graph for %s(%s)' % (func, var))
129 continue
131 pylab.clf()
132 pylab.ylabel('Time (cycles)')
134 # First set of points
135 length = len(bench1['functions'][func][var]['timings'])
136 X = [float(x) for x in range(length)]
137 lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
138 1.5 + 100 / length)
139 pylab.setp(lines, 'color', 'r')
141 # Second set of points
142 length = len(bench2['functions'][func][var]['timings'])
143 X = [float(x) for x in range(length)]
144 lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
145 1.5 + 100 / length)
146 pylab.setp(lines, 'color', 'g')
148 if var:
149 filename = "%s-%s.png" % (func, var)
150 else:
151 filename = "%s.png" % func
152 print('Writing out %s' % filename)
153 pylab.savefig(filename)
155 def main(bench1, bench2, schema, threshold):
156 bench1 = bench.parse_bench(bench1, schema)
157 bench2 = bench.parse_bench(bench2, schema)
159 plot_graphs(bench1, bench2)
161 bench.compress_timings(bench1)
162 bench.compress_timings(bench2)
164 compare_runs(bench1, bench2, threshold)
167 if __name__ == '__main__':
168 parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')
170 # Required parameters
171 parser.add_argument('bench1', help='First bench to compare')
172 parser.add_argument('bench2', help='Second bench to compare')
174 # Optional parameters
175 parser.add_argument('--schema',
176 default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
177 help='JSON file to validate source/dest files (default: %(default)s)')
178 parser.add_argument('--threshold', default=10.0, help='Only print those with equal or higher threshold (default: %(default)s)')
180 args = parser.parse_args()
182 main(args.bench1, args.bench2, args.schema, args.threshold)