benchtests/scripts/import_bench.py

   1 #!/usr/bin/python
   2 # Copyright (C) 2015-2017 Free Software Foundation, Inc.
   3 # This file is part of the GNU C Library.
   4 #
   5 # The GNU C Library is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU Lesser General Public
   7 # License as published by the Free Software Foundation; either
   8 # version 2.1 of the License, or (at your option) any later version.
   9 #
  10 # The GNU C Library is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 # Lesser General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with the GNU C Library; if not, see
  17 # <http://www.gnu.org/licenses/>.
  18 """Functions to import benchmark data and process it"""
  19
  20 import json
  21 try:
  22     import jsonschema as validator
  23 except ImportError:
  24     print('Could not find jsonschema module.')
  25     raise
  26
  27
  28 def mean(lst):
  29     """Compute and return mean of numbers in a list
  30
  31     The numpy average function has horrible performance, so implement our
  32     own mean function.
  33
  34     Args:
  35         lst: The list of numbers to average.
  36     Return:
  37         The mean of members in the list.
  38     """
  39     return sum(lst) / len(lst)
  40
  41
  42 def split_list(bench, func, var):
  43     """ Split the list into a smaller set of more distinct points
  44
  45     Group together points such that the difference between the smallest
  46     point and the mean is less than 1/3rd of the mean.  This means that
  47     the mean is at most 1.5x the smallest member of that group.
  48
  49     mean - xmin < mean / 3
  50     i.e. 2 * mean / 3 < xmin
  51     i.e. mean < 3 * xmin / 2
  52
  53     For an evenly distributed group, the largest member will be less than
  54     twice the smallest member of the group.
  55     Derivation:
  56
  57     An evenly distributed series would be xmin, xmin + d, xmin + 2d...
  58
  59     mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
  60     and max element is xmin + (n - 1) * d
  61
  62     Now, mean < 3 * xmin / 2
  63
  64     3 * xmin > 2 * mean
  65     3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
  66     3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
  67     n * xmin > n * (n - 1) * d
  68     xmin > (n - 1) * d
  69     2 * xmin > xmin + (n-1) * d
  70     2 * xmin > xmax
  71
  72     Hence, proved.
  73
  74     Similarly, it is trivial to prove that for a similar aggregation by using
  75     the maximum element, the maximum element in the group must be at most 4/3
  76     times the mean.
  77
  78     Args:
  79         bench: The benchmark object
  80         func: The function name
  81         var: The function variant name
  82     """
  83     means = []
  84     lst = bench['functions'][func][var]['timings']
  85     last = len(lst) - 1
  86     while lst:
  87         for i in range(last + 1):
  88             avg = mean(lst[i:])
  89             if avg > 0.75 * lst[last]:
  90                 means.insert(0, avg)
  91                 lst = lst[:i]
  92                 last = i - 1
  93                 break
  94     bench['functions'][func][var]['timings'] = means
  95
  96
  97 def do_for_all_timings(bench, callback):
  98     """Call a function for all timing objects for each function and its
  99     variants.
 100
 101     Args:
 102         bench: The benchmark object
 103         callback: The callback function
 104     """
 105     for func in bench['functions'].keys():
 106         for k in bench['functions'][func].keys():
 107             if 'timings' not in bench['functions'][func][k].keys():
 108                 continue
 109
 110             callback(bench, func, k)
 111
 112
 113 def compress_timings(points):
 114     """Club points with close enough values into a single mean value
 115
 116     See split_list for details on how the clubbing is done.
 117
 118     Args:
 119         points: The set of points.
 120     """
 121     do_for_all_timings(points, split_list)
 122
 123
 124 def parse_bench(filename, schema_filename):
 125     """Parse the input file
 126
 127     Parse and validate the json file containing the benchmark outputs.  Return
 128     the resulting object.
 129     Args:
 130         filename: Name of the benchmark output file.
 131     Return:
 132         The bench dictionary.
 133     """
 134     with open(schema_filename, 'r') as schemafile:
 135         schema = json.load(schemafile)
 136         with open(filename, 'r') as benchfile:
 137             bench = json.load(benchfile)
 138             validator.validate(bench, schema)
 139             do_for_all_timings(bench, lambda b, f, v:
 140                     b['functions'][f][v]['timings'].sort())
 141             return bench