Update copyright dates with scripts/update-copyrights
[glibc.git] / benchtests / scripts / import_bench.py
blob92d9dc4fbaf083827a97eaaef432942ed33229db
1 #!/usr/bin/python
2 # Copyright (C) 2015-2023 Free Software Foundation, Inc.
3 # This file is part of the GNU C Library.
5 # The GNU C Library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
10 # The GNU C Library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with the GNU C Library; if not, see
17 # <https://www.gnu.org/licenses/>.
18 """Functions to import benchmark data and process it"""
20 import json
21 try:
22 import jsonschema as validator
23 except ImportError:
24 print('Could not find jsonschema module.')
25 raise
28 def mean(lst):
29 """Compute and return mean of numbers in a list
31 The numpy average function has horrible performance, so implement our
32 own mean function.
34 Args:
35 lst: The list of numbers to average.
36 Return:
37 The mean of members in the list.
38 """
39 return sum(lst) / len(lst)
42 def split_list(bench, func, var):
43 """ Split the list into a smaller set of more distinct points
45 Group together points such that the difference between the smallest
46 point and the mean is less than 1/3rd of the mean. This means that
47 the mean is at most 1.5x the smallest member of that group.
49 mean - xmin < mean / 3
50 i.e. 2 * mean / 3 < xmin
51 i.e. mean < 3 * xmin / 2
53 For an evenly distributed group, the largest member will be less than
54 twice the smallest member of the group.
55 Derivation:
57 An evenly distributed series would be xmin, xmin + d, xmin + 2d...
59 mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
60 and max element is xmin + (n - 1) * d
62 Now, mean < 3 * xmin / 2
64 3 * xmin > 2 * mean
65 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
66 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
67 n * xmin > n * (n - 1) * d
68 xmin > (n - 1) * d
69 2 * xmin > xmin + (n-1) * d
70 2 * xmin > xmax
72 Hence, proved.
74 Similarly, it is trivial to prove that for a similar aggregation by using
75 the maximum element, the maximum element in the group must be at most 4/3
76 times the mean.
78 Args:
79 bench: The benchmark object
80 func: The function name
81 var: The function variant name
82 """
83 means = []
84 lst = bench['functions'][func][var]['timings']
85 last = len(lst) - 1
86 while lst:
87 for i in range(last + 1):
88 avg = mean(lst[i:])
89 if avg > 0.75 * lst[last]:
90 means.insert(0, avg)
91 lst = lst[:i]
92 last = i - 1
93 break
94 bench['functions'][func][var]['timings'] = means
97 def do_for_all_timings(bench, callback):
98 """Call a function for all timing objects for each function and its
99 variants.
101 Args:
102 bench: The benchmark object
103 callback: The callback function
105 for func in bench['functions'].keys():
106 for k in bench['functions'][func].keys():
107 if 'timings' not in bench['functions'][func][k].keys():
108 continue
110 callback(bench, func, k)
113 def compress_timings(points):
114 """Club points with close enough values into a single mean value
116 See split_list for details on how the clubbing is done.
118 Args:
119 points: The set of points.
121 do_for_all_timings(points, split_list)
124 def parse_bench(filename, schema_filename):
125 """Parse the input file
127 Parse and validate the json file containing the benchmark outputs. Return
128 the resulting object.
129 Args:
130 filename: Name of the benchmark output file.
131 Return:
132 The bench dictionary.
134 with open(schema_filename, 'r') as schemafile:
135 schema = json.load(schemafile)
136 with open(filename, 'r') as benchfile:
137 bench = json.load(benchfile)
138 validator.validate(bench, schema)
139 return bench