2 # Copyright (C) 2015-2022 Free Software Foundation, Inc.
3 # This file is part of the GNU C Library.
5 # The GNU C Library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
10 # The GNU C Library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with the GNU C Library; if not, see
17 # <https://www.gnu.org/licenses/>.
18 """Compare two benchmark results
20 Given two benchmark result files and a threshold, this script compares the
21 benchmark results and flags differences in performance beyond a given
27 import import_bench
as bench
30 def do_compare(func
, var
, tl1
, tl2
, par
, threshold
):
31 """Compare one of the aggregate measurements
33 Helper function to compare one of the aggregate measurements of a function
38 var: Function variant name
39 tl1: The first timings list
40 tl2: The second timings list
41 par: The aggregate to measure
42 threshold: The threshold for differences, beyond which the script should
48 d
= abs(v2
- v1
) * 100 / v1
50 sys
.stderr
.write('%s(%s)[%s]: stat does not exist\n' % (func
, var
, par
))
52 except ZeroDivisionError:
60 print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
61 (ind
, func
, var
, par
, d
, v1
, v2
))
64 def compare_runs(pts1
, pts2
, threshold
, stats
):
65 """Compare two benchmark runs
68 pts1: Timing data from first machine
69 pts2: Timing data from second machine
72 # XXX We assume that the two benchmarks have identical functions and
73 # variants. We cannot compare two benchmarks that may have different
74 # functions or variants. Maybe that is something for the future.
75 for func
in pts1
['functions'].keys():
76 for var
in pts1
['functions'][func
].keys():
77 tl1
= pts1
['functions'][func
][var
]
78 tl2
= pts2
['functions'][func
][var
]
80 # Compare the consolidated numbers
81 # do_compare(func, var, tl1, tl2, 'max', threshold)
82 for stat
in stats
.split():
83 do_compare(func
, var
, tl1
, tl2
, stat
, threshold
)
85 # Skip over to the next variant or function if there is no detailed
86 # timing info for the function variant.
87 if 'timings' not in pts1
['functions'][func
][var
].keys() or \
88 'timings' not in pts2
['functions'][func
][var
].keys():
91 # If two lists do not have the same length then it is likely that
92 # the performance characteristics of the function have changed.
93 # XXX: It is also likely that there was some measurement that
94 # strayed outside the usual range. Such ouiers should not
95 # happen on an idle machine with identical hardware and
96 # configuration, but ideal environments are hard to come by.
97 if len(tl1
['timings']) != len(tl2
['timings']):
98 print('* %s(%s): Timing characteristics changed' %
100 print('\tBefore: [%s]' %
101 ', '.join([str(x
) for x
in tl1
['timings']]))
102 print('\tAfter: [%s]' %
103 ', '.join([str(x
) for x
in tl2
['timings']]))
106 # Collect numbers whose differences cross the threshold we have
108 issues
= [(x
, y
) for x
, y
in zip(tl1
['timings'], tl2
['timings']) \
109 if abs(y
- x
) * 100 / x
> threshold
]
112 for t1
, t2
in issues
:
113 d
= abs(t2
- t1
) * 100 / t1
119 print("%s %s(%s): (%.2lf%%) from %g to %g" %
120 (ind
, func
, var
, d
, t1
, t2
))
123 def plot_graphs(bench1
, bench2
):
124 """Plot graphs for functions
126 Make scatter plots for the functions and their variants.
129 bench1: Set of points from the first machine
130 bench2: Set of points from the second machine.
132 for func
in bench1
['functions'].keys():
133 for var
in bench1
['functions'][func
].keys():
134 # No point trying to print a graph if there are no detailed
136 if u
'timings' not in bench1
['functions'][func
][var
].keys():
137 sys
.stderr
.write('Skipping graph for %s(%s)\n' % (func
, var
))
141 pylab
.ylabel('Time (cycles)')
143 # First set of points
144 length
= len(bench1
['functions'][func
][var
]['timings'])
145 X
= [float(x
) for x
in range(length
)]
146 lines
= pylab
.scatter(X
, bench1
['functions'][func
][var
]['timings'],
148 pylab
.setp(lines
, 'color', 'r')
150 # Second set of points
151 length
= len(bench2
['functions'][func
][var
]['timings'])
152 X
= [float(x
) for x
in range(length
)]
153 lines
= pylab
.scatter(X
, bench2
['functions'][func
][var
]['timings'],
155 pylab
.setp(lines
, 'color', 'g')
158 filename
= "%s-%s.png" % (func
, var
)
160 filename
= "%s.png" % func
161 sys
.stderr
.write('Writing out %s' % filename
)
162 pylab
.savefig(filename
)
164 def main(bench1
, bench2
, schema
, threshold
, stats
):
165 bench1
= bench
.parse_bench(bench1
, schema
)
166 bench
.do_for_all_timings(bench1
, lambda b
, f
, v
:
167 b
['functions'][f
][v
]['timings'].sort())
168 bench2
= bench
.parse_bench(bench2
, schema
)
169 bench
.do_for_all_timings(bench2
, lambda b
, f
, v
:
170 b
['functions'][f
][v
]['timings'].sort())
172 plot_graphs(bench1
, bench2
)
174 bench
.compress_timings(bench1
)
175 bench
.compress_timings(bench2
)
177 compare_runs(bench1
, bench2
, threshold
, stats
)
180 if __name__
== '__main__':
181 parser
= argparse
.ArgumentParser(description
='Take two benchmark and compare their timings.')
183 # Required parameters
184 parser
.add_argument('bench1', help='First bench to compare')
185 parser
.add_argument('bench2', help='Second bench to compare')
187 # Optional parameters
188 parser
.add_argument('--schema',
189 default
=os
.path
.join(os
.path
.dirname(os
.path
.realpath(__file__
)),'benchout.schema.json'),
190 help='JSON file to validate source/dest files (default: %(default)s)')
191 parser
.add_argument('--threshold', default
=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)')
192 parser
.add_argument('--stats', default
='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)')
194 args
= parser
.parse_args()
196 main(args
.bench1
, args
.bench2
, args
.schema
, args
.threshold
, args
.stats
)