benchtests/scripts/compare_bench.py

   1 #!/usr/bin/python
   2 # Copyright (C) 2015-2016 Free Software Foundation, Inc.
   3 # This file is part of the GNU C Library.
   4 #
   5 # The GNU C Library is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU Lesser General Public
   7 # License as published by the Free Software Foundation; either
   8 # version 2.1 of the License, or (at your option) any later version.
   9 #
  10 # The GNU C Library is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 # Lesser General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with the GNU C Library; if not, see
  17 # <http://www.gnu.org/licenses/>.
  18 """Compare two benchmark results
  19
  20 Given two benchmark result files and a threshold, this script compares the
  21 benchmark results and flags differences in performance beyond a given
  22 threshold.
  23 """
  24 import sys
  25 import os
  26 import pylab
  27 import import_bench as bench
  28
  29 def do_compare(func, var, tl1, tl2, par, threshold):
  30     """Compare one of the aggregate measurements
  31
  32     Helper function to compare one of the aggregate measurements of a function
  33     variant.
  34
  35     Args:
  36         func: Function name
  37         var: Function variant name
  38         tl1: The first timings list
  39         tl2: The second timings list
  40         par: The aggregate to measure
  41         threshold: The threshold for differences, beyond which the script should
  42         print a warning.
  43     """
  44     d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)]
  45     if d > threshold:
  46         if tl1[par] > tl2[par]:
  47             ind = '+++'
  48         else:
  49             ind = '---'
  50         print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
  51                 (ind, func, var, par, d, tl1[par], tl2[par]))
  52
  53
  54 def compare_runs(pts1, pts2, threshold):
  55     """Compare two benchmark runs
  56
  57     Args:
  58         pts1: Timing data from first machine
  59         pts2: Timing data from second machine
  60     """
  61
  62     # XXX We assume that the two benchmarks have identical functions and
  63     # variants.  We cannot compare two benchmarks that may have different
  64     # functions or variants.  Maybe that is something for the future.
  65     for func in pts1['functions'].keys():
  66         for var in pts1['functions'][func].keys():
  67             tl1 = pts1['functions'][func][var]
  68             tl2 = pts2['functions'][func][var]
  69
  70             # Compare the consolidated numbers
  71             # do_compare(func, var, tl1, tl2, 'max', threshold)
  72             do_compare(func, var, tl1, tl2, 'min', threshold)
  73             do_compare(func, var, tl1, tl2, 'mean', threshold)
  74
  75             # Skip over to the next variant or function if there is no detailed
  76             # timing info for the function variant.
  77             if 'timings' not in pts1['functions'][func][var].keys() or \
  78                 'timings' not in pts2['functions'][func][var].keys():
  79                     return
  80
  81             # If two lists do not have the same length then it is likely that
  82             # the performance characteristics of the function have changed.
  83             # XXX: It is also likely that there was some measurement that
  84             # strayed outside the usual range.  Such ouiers should not
  85             # happen on an idle machine with identical hardware and
  86             # configuration, but ideal environments are hard to come by.
  87             if len(tl1['timings']) != len(tl2['timings']):
  88                 print('* %s(%s): Timing characteristics changed' %
  89                         (func, var))
  90                 print('\tBefore: [%s]' %
  91                         ', '.join([str(x) for x in tl1['timings']]))
  92                 print('\tAfter: [%s]' %
  93                         ', '.join([str(x) for x in tl2['timings']]))
  94                 continue
  95
  96             # Collect numbers whose differences cross the threshold we have
  97             # set.
  98             issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
  99                         if abs(y - x) * 100 / x > threshold]
 100
 101             # Now print them.
 102             for t1, t2 in issues:
 103                 d = abs(t2 - t1) * 100 / t1
 104                 if t2 > t1:
 105                     ind = '-'
 106                 else:
 107                     ind = '+'
 108
 109                 print("%s %s(%s): (%.2lf%%) from %g to %g" %
 110                         (ind, func, var, d, t1, t2))
 111
 112
 113 def plot_graphs(bench1, bench2):
 114     """Plot graphs for functions
 115
 116     Make scatter plots for the functions and their variants.
 117
 118     Args:
 119         bench1: Set of points from the first machine
 120         bench2: Set of points from the second machine.
 121     """
 122     for func in bench1['functions'].keys():
 123         for var in bench1['functions'][func].keys():
 124             # No point trying to print a graph if there are no detailed
 125             # timings.
 126             if u'timings' not in bench1['functions'][func][var].keys():
 127                 print('Skipping graph for %s(%s)' % (func, var))
 128                 continue
 129
 130             pylab.clf()
 131             pylab.ylabel('Time (cycles)')
 132
 133             # First set of points
 134             length = len(bench1['functions'][func][var]['timings'])
 135             X = [float(x) for x in range(length)]
 136             lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
 137                     1.5 + 100 / length)
 138             pylab.setp(lines, 'color', 'r')
 139
 140             # Second set of points
 141             length = len(bench2['functions'][func][var]['timings'])
 142             X = [float(x) for x in range(length)]
 143             lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
 144                     1.5 + 100 / length)
 145             pylab.setp(lines, 'color', 'g')
 146
 147             if var:
 148                 filename = "%s-%s.png" % (func, var)
 149             else:
 150                 filename = "%s.png" % func
 151             print('Writing out %s' % filename)
 152             pylab.savefig(filename)
 153
 154
 155 def main(args):
 156     """Program Entry Point
 157
 158     Take two benchmark output files and compare their timings.
 159     """
 160     if len(args) > 4 or len(args) < 3:
 161         print('Usage: %s <schema> <file1> <file2> [threshold in %%]' % sys.argv[0])
 162         sys.exit(os.EX_USAGE)
 163
 164     bench1 = bench.parse_bench(args[1], args[0])
 165     bench2 = bench.parse_bench(args[2], args[0])
 166     if len(args) == 4:
 167         threshold = float(args[3])
 168     else:
 169         threshold = 10.0
 170
 171     if (bench1['timing_type'] != bench2['timing_type']):
 172         print('Cannot compare benchmark outputs: timing types are different')
 173         return
 174
 175     plot_graphs(bench1, bench2)
 176
 177     bench.compress_timings(bench1)
 178     bench.compress_timings(bench2)
 179
 180     compare_runs(bench1, bench2, threshold)
 181
 182
 183 if __name__ == '__main__':
 184     main(sys.argv[1:])