syslog: Fix large messages (BZ#29536)
[glibc.git] / benchtests / scripts / plot_strings.py
blob2504c6939b24a22bd806968e6c446b83397c14e9
1 #!/usr/bin/python3
2 # Plot GNU C Library string microbenchmark output.
3 # Copyright (C) 2019-2022 Free Software Foundation, Inc.
4 # This file is part of the GNU C Library.
6 # The GNU C Library is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU Lesser General Public
8 # License as published by the Free Software Foundation; either
9 # version 2.1 of the License, or (at your option) any later version.
11 # The GNU C Library is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with the GNU C Library; if not, see
18 # <https://www.gnu.org/licenses/>.
19 """Plot string microbenchmark results.
21 Given a benchmark results file in JSON format and a benchmark schema file,
22 plot the benchmark timings in one of the available representations.
24 Separate figure is generated and saved to a file for each 'results' array
25 found in the benchmark results file. Output filenames and plot titles
26 are derived from the metadata found in the benchmark results file.
27 """
28 import argparse
29 from collections import defaultdict
30 import json
31 import matplotlib as mpl
32 import numpy as np
33 import os
34 import sys
36 try:
37 import jsonschema as validator
38 except ImportError:
39 print("Could not find jsonschema module.")
40 raise
42 # Use pre-selected markers for plotting lines to improve readability
43 markers = [".", "x", "^", "+", "*", "v", "1", ">", "s"]
45 # Benchmark variants for which the x-axis scale should be logarithmic
46 log_variants = {"powers of 2"}
49 def gmean(numbers):
50 """Compute geometric mean.
52 Args:
53 numbers: 2-D list of numbers
54 Return:
55 numpy array with geometric means of numbers along each column
56 """
57 a = np.array(numbers, dtype=np.complex)
58 means = a.prod(0) ** (1.0 / len(a))
59 return np.real(means)
62 def relativeDifference(x, x_reference):
63 """Compute per-element relative difference between each row of
64 a matrix and an array of reference values.
66 Args:
67 x: numpy matrix of shape (n, m)
68 x_reference: numpy array of size m
69 Return:
70 relative difference between rows of x and x_reference (in %)
71 """
72 abs_diff = np.subtract(x, x_reference)
73 return np.divide(np.multiply(abs_diff, 100.0), x_reference)
76 def plotTime(timings, routine, bench_variant, title, outpath):
77 """Plot absolute timing values.
79 Args:
80 timings: timings to plot
81 routine: benchmarked string routine name
82 bench_variant: top-level benchmark variant name
83 title: figure title (generated so far)
84 outpath: output file path (generated so far)
85 Return:
86 y: y-axis values to plot
87 title_final: final figure title
88 outpath_final: file output file path
89 """
90 y = timings
91 plt.figure()
93 if not args.values:
94 plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
96 plt.ylabel("timing")
97 title_final = "%s %s benchmark timings\n%s" % \
98 (routine, bench_variant, title)
99 outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
100 (routine, args.plot, bench_variant, outpath))
102 return y, title_final, outpath_final
105 def plotRelative(timings, all_timings, routine, ifuncs, bench_variant,
106 title, outpath):
107 """Plot timing values relative to a chosen ifunc
109 Args:
110 timings: timings to plot
111 all_timings: all collected timings
112 routine: benchmarked string routine name
113 ifuncs: names of ifuncs tested
114 bench_variant: top-level benchmark variant name
115 title: figure title (generated so far)
116 outpath: output file path (generated so far)
117 Return:
118 y: y-axis values to plot
119 title_final: final figure title
120 outpath_final: file output file path
122 # Choose the baseline ifunc
123 if args.baseline:
124 baseline = args.baseline.replace("__", "")
125 else:
126 baseline = ifuncs[0]
128 baseline_index = ifuncs.index(baseline)
130 # Compare timings against the baseline
131 y = relativeDifference(timings, all_timings[baseline_index])
133 plt.figure()
134 plt.axhspan(-args.threshold, args.threshold, color="lightgray", alpha=0.3)
135 plt.axhline(0, color="k", linestyle="--", linewidth=0.4)
136 plt.ylabel("relative timing (in %)")
137 title_final = "Timing comparison against %s\nfor %s benchmark, %s" % \
138 (baseline, bench_variant, title)
139 outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
140 (baseline, args.plot, bench_variant, outpath))
142 return y, title_final, outpath_final
145 def plotMax(timings, routine, bench_variant, title, outpath):
146 """Plot results as percentage of the maximum ifunc performance.
148 The optimal ifunc is computed on a per-parameter-value basis.
149 Performance is computed as 1/timing.
151 Args:
152 timings: timings to plot
153 routine: benchmarked string routine name
154 bench_variant: top-level benchmark variant name
155 title: figure title (generated so far)
156 outpath: output file path (generated so far)
157 Return:
158 y: y-axis values to plot
159 title_final: final figure title
160 outpath_final: file output file path
162 perf = np.reciprocal(timings)
163 max_perf = np.max(perf, axis=0)
164 y = np.add(100.0, relativeDifference(perf, max_perf))
166 plt.figure()
167 plt.axhline(100.0, color="k", linestyle="--", linewidth=0.4)
168 plt.ylabel("1/timing relative to max (in %)")
169 title_final = "Performance comparison against max for %s\n%s " \
170 "benchmark, %s" % (routine, bench_variant, title)
171 outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
172 (routine, args.plot, bench_variant, outpath))
174 return y, title_final, outpath_final
177 def plotThroughput(timings, params, routine, bench_variant, title, outpath):
178 """Plot throughput.
180 Throughput is computed as the varied parameter value over timing.
182 Args:
183 timings: timings to plot
184 params: varied parameter values
185 routine: benchmarked string routine name
186 bench_variant: top-level benchmark variant name
187 title: figure title (generated so far)
188 outpath: output file path (generated so far)
189 Return:
190 y: y-axis values to plot
191 title_final: final figure title
192 outpath_final: file output file path
194 y = np.divide(params, timings)
195 plt.figure()
197 if not args.values:
198 plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
200 plt.ylabel("%s / timing" % args.key)
201 title_final = "%s %s benchmark throughput results\n%s" % \
202 (routine, bench_variant, title)
203 outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
204 (routine, args.plot, bench_variant, outpath))
205 return y, title_final, outpath_final
208 def finishPlot(x, y, title, outpath, x_scale, plotted_ifuncs):
209 """Finish generating current Figure.
211 Args:
212 x: x-axis values
213 y: y-axis values
214 title: figure title
215 outpath: output file path
216 x_scale: x-axis scale
217 plotted_ifuncs: names of ifuncs to plot
219 plt.xlabel(args.key)
220 plt.xscale(x_scale)
221 plt.title(title)
223 plt.grid(color="k", linestyle=args.grid, linewidth=0.5, alpha=0.5)
225 for i in range(len(plotted_ifuncs)):
226 plt.plot(x, y[i], marker=markers[i % len(markers)],
227 label=plotted_ifuncs[i])
229 plt.legend(loc="best", fontsize="small")
230 plt.savefig("%s_%s.%s" % (outpath, x_scale, args.extension),
231 format=args.extension, dpi=args.resolution)
233 if args.display:
234 plt.show()
236 plt.close()
239 def plotRecursive(json_iter, routine, ifuncs, bench_variant, title, outpath,
240 x_scale):
241 """Plot benchmark timings.
243 Args:
244 json_iter: reference to json object
245 routine: benchmarked string routine name
246 ifuncs: names of ifuncs tested
247 bench_variant: top-level benchmark variant name
248 title: figure's title (generated so far)
249 outpath: output file path (generated so far)
250 x_scale: x-axis scale
253 # RECURSIVE CASE: 'variants' array found
254 if "variants" in json_iter:
255 # Continue recursive search for 'results' array. Record the
256 # benchmark variant (configuration) in order to customize
257 # the title, filename and X-axis scale for the generated figure.
258 for variant in json_iter["variants"]:
259 new_title = "%s%s, " % (title, variant["name"])
260 new_outpath = "%s_%s" % (outpath, variant["name"].replace(" ", "_"))
261 new_x_scale = "log" if variant["name"] in log_variants else x_scale
263 plotRecursive(variant, routine, ifuncs, bench_variant, new_title,
264 new_outpath, new_x_scale)
265 return
267 # BASE CASE: 'results' array found
268 domain = []
269 timings = defaultdict(list)
271 # Collect timings
272 for result in json_iter["results"]:
273 domain.append(result[args.key])
274 timings[result[args.key]].append(result["timings"])
276 domain = np.unique(np.array(domain))
277 averages = []
279 # Compute geometric mean if there are multple timings for each
280 # parameter value.
281 for parameter in domain:
282 averages.append(gmean(timings[parameter]))
284 averages = np.array(averages).transpose()
286 # Choose ifuncs to plot
287 if isinstance(args.ifuncs, str):
288 plotted_ifuncs = ifuncs
289 else:
290 plotted_ifuncs = [x.replace("__", "") for x in args.ifuncs]
292 plotted_indices = [ifuncs.index(x) for x in plotted_ifuncs]
293 plotted_vals = averages[plotted_indices,:]
295 # Plotting logic specific to each plot type
296 if args.plot == "time":
297 codomain, title, outpath = plotTime(plotted_vals, routine,
298 bench_variant, title, outpath)
299 elif args.plot == "rel":
300 codomain, title, outpath = plotRelative(plotted_vals, averages, routine,
301 ifuncs, bench_variant, title, outpath)
302 elif args.plot == "max":
303 codomain, title, outpath = plotMax(plotted_vals, routine,
304 bench_variant, title, outpath)
305 elif args.plot == "thru":
306 codomain, title, outpath = plotThroughput(plotted_vals, domain, routine,
307 bench_variant, title, outpath)
309 # Plotting logic shared between plot types
310 finishPlot(domain, codomain, title, outpath, x_scale, plotted_ifuncs)
313 def main(args):
314 """Program Entry Point.
316 Args:
317 args: command line arguments (excluding program name)
320 # Select non-GUI matplotlib backend if interactive display is disabled
321 if not args.display:
322 mpl.use("Agg")
324 global plt
325 import matplotlib.pyplot as plt
327 schema = None
329 with open(args.schema, "r") as f:
330 schema = json.load(f)
332 for filename in args.bench:
333 bench = None
335 if filename == '-':
336 bench = json.load(sys.stdin)
337 else:
338 with open(filename, "r") as f:
339 bench = json.load(f)
341 validator.validate(bench, schema)
343 for function in bench["functions"]:
344 bench_variant = bench["functions"][function]["bench-variant"]
345 ifuncs = bench["functions"][function]["ifuncs"]
346 ifuncs = [x.replace("__", "") for x in ifuncs]
348 plotRecursive(bench["functions"][function], function, ifuncs,
349 bench_variant, "", "", args.logarithmic)
352 """ main() """
353 if __name__ == "__main__":
355 parser = argparse.ArgumentParser(description=
356 "Plot string microbenchmark results",
357 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
359 # Required parameter
360 parser.add_argument("bench", nargs="+",
361 help="benchmark results file(s) in json format, " \
362 "and/or '-' as a benchmark result file from stdin")
364 # Optional parameters
365 parser.add_argument("-b", "--baseline", type=str,
366 help="baseline ifunc for 'rel' plot")
367 parser.add_argument("-d", "--display", action="store_true",
368 help="display figures")
369 parser.add_argument("-e", "--extension", type=str, default="png",
370 choices=["png", "pdf", "svg"],
371 help="output file(s) extension")
372 parser.add_argument("-g", "--grid", action="store_const", default="",
373 const="-", help="show grid lines")
374 parser.add_argument("-i", "--ifuncs", nargs="+", default="all",
375 help="ifuncs to plot")
376 parser.add_argument("-k", "--key", type=str, default="length",
377 help="key to access the varied parameter")
378 parser.add_argument("-l", "--logarithmic", action="store_const",
379 default="linear", const="log",
380 help="use logarithmic x-axis scale")
381 parser.add_argument("-o", "--outdir", type=str, default=os.getcwd(),
382 help="output directory")
383 parser.add_argument("-p", "--plot", type=str, default="time",
384 choices=["time", "rel", "max", "thru"],
385 help="plot absolute timings, relative timings, " \
386 "performance relative to max, or throughput")
387 parser.add_argument("-r", "--resolution", type=int, default=100,
388 help="dpi resolution for the generated figures")
389 parser.add_argument("-s", "--schema", type=str,
390 default=os.path.join(os.path.dirname(
391 os.path.realpath(__file__)),
392 "benchout_strings.schema.json"),
393 help="schema file to validate the results file.")
394 parser.add_argument("-t", "--threshold", type=int, default=5,
395 help="threshold to mark in 'rel' graph (in %%)")
396 parser.add_argument("-v", "--values", action="store_true",
397 help="show actual values")
399 args = parser.parse_args()
400 main(args)