benchtests/scripts/plot_strings.py

   1 #!/usr/bin/python3
   2 # Plot GNU C Library string microbenchmark output.
   3 # Copyright (C) 2019-2022 Free Software Foundation, Inc.
   4 # This file is part of the GNU C Library.
   5 #
   6 # The GNU C Library is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU Lesser General Public
   8 # License as published by the Free Software Foundation; either
   9 # version 2.1 of the License, or (at your option) any later version.
  10 #
  11 # The GNU C Library is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # Lesser General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU Lesser General Public
  17 # License along with the GNU C Library; if not, see
  18 # <https://www.gnu.org/licenses/>.
  19 """Plot string microbenchmark results.
  20
  21 Given a benchmark results file in JSON format and a benchmark schema file,
  22 plot the benchmark timings in one of the available representations.
  23
  24 Separate figure is generated and saved to a file for each 'results' array
  25 found in the benchmark results file. Output filenames and plot titles
  26 are derived from the metadata found in the benchmark results file.
  27 """
  28 import argparse
  29 from collections import defaultdict
  30 import json
  31 import matplotlib as mpl
  32 import numpy as np
  33 import os
  34 import sys
  35
  36 try:
  37     import jsonschema as validator
  38 except ImportError:
  39     print("Could not find jsonschema module.")
  40     raise
  41
  42 # Use pre-selected markers for plotting lines to improve readability
  43 markers = [".", "x", "^", "+", "*", "v", "1", ">", "s"]
  44
  45 # Benchmark variants for which the x-axis scale should be logarithmic
  46 log_variants = {"powers of 2"}
  47
  48
  49 def gmean(numbers):
  50     """Compute geometric mean.
  51
  52     Args:
  53         numbers: 2-D list of numbers
  54     Return:
  55         numpy array with geometric means of numbers along each column
  56     """
  57     a = np.array(numbers, dtype=np.complex)
  58     means = a.prod(0) ** (1.0 / len(a))
  59     return np.real(means)
  60
  61
  62 def relativeDifference(x, x_reference):
  63     """Compute per-element relative difference between each row of
  64        a matrix and an array of reference values.
  65
  66     Args:
  67         x: numpy matrix of shape (n, m)
  68         x_reference: numpy array of size m
  69     Return:
  70         relative difference between rows of x and x_reference (in %)
  71     """
  72     abs_diff = np.subtract(x, x_reference)
  73     return np.divide(np.multiply(abs_diff, 100.0), x_reference)
  74
  75
  76 def plotTime(timings, routine, bench_variant, title, outpath):
  77     """Plot absolute timing values.
  78
  79     Args:
  80         timings: timings to plot
  81         routine: benchmarked string routine name
  82         bench_variant: top-level benchmark variant name
  83         title: figure title (generated so far)
  84         outpath: output file path (generated so far)
  85     Return:
  86         y: y-axis values to plot
  87         title_final: final figure title
  88         outpath_final: file output file path
  89     """
  90     y = timings
  91     plt.figure()
  92
  93     if not args.values:
  94         plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
  95
  96     plt.ylabel("timing")
  97     title_final = "%s %s benchmark timings\n%s" % \
  98                   (routine, bench_variant, title)
  99     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 100                     (routine, args.plot, bench_variant, outpath))
 101
 102     return y, title_final, outpath_final
 103
 104
 105 def plotRelative(timings, all_timings, routine, ifuncs, bench_variant,
 106                  title, outpath):
 107     """Plot timing values relative to a chosen ifunc
 108
 109     Args:
 110         timings: timings to plot
 111         all_timings: all collected timings
 112         routine: benchmarked string routine name
 113         ifuncs: names of ifuncs tested
 114         bench_variant: top-level benchmark variant name
 115         title: figure title (generated so far)
 116         outpath: output file path (generated so far)
 117     Return:
 118         y: y-axis values to plot
 119         title_final: final figure title
 120         outpath_final: file output file path
 121     """
 122     # Choose the baseline ifunc
 123     if args.baseline:
 124         baseline = args.baseline.replace("__", "")
 125     else:
 126         baseline = ifuncs[0]
 127
 128     baseline_index = ifuncs.index(baseline)
 129
 130     # Compare timings against the baseline
 131     y = relativeDifference(timings, all_timings[baseline_index])
 132
 133     plt.figure()
 134     plt.axhspan(-args.threshold, args.threshold, color="lightgray", alpha=0.3)
 135     plt.axhline(0, color="k", linestyle="--", linewidth=0.4)
 136     plt.ylabel("relative timing (in %)")
 137     title_final = "Timing comparison against %s\nfor %s benchmark, %s" % \
 138                   (baseline, bench_variant, title)
 139     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 140                     (baseline, args.plot, bench_variant, outpath))
 141
 142     return y, title_final, outpath_final
 143
 144
 145 def plotMax(timings, routine, bench_variant, title, outpath):
 146     """Plot results as percentage of the maximum ifunc performance.
 147
 148     The optimal ifunc is computed on a per-parameter-value basis.
 149     Performance is computed as 1/timing.
 150
 151     Args:
 152         timings: timings to plot
 153         routine: benchmarked string routine name
 154         bench_variant: top-level benchmark variant name
 155         title: figure title (generated so far)
 156         outpath: output file path (generated so far)
 157     Return:
 158         y: y-axis values to plot
 159         title_final: final figure title
 160         outpath_final: file output file path
 161     """
 162     perf = np.reciprocal(timings)
 163     max_perf = np.max(perf, axis=0)
 164     y = np.add(100.0, relativeDifference(perf, max_perf))
 165
 166     plt.figure()
 167     plt.axhline(100.0, color="k", linestyle="--", linewidth=0.4)
 168     plt.ylabel("1/timing relative to max (in %)")
 169     title_final = "Performance comparison against max for %s\n%s " \
 170                   "benchmark, %s" % (routine, bench_variant, title)
 171     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 172                     (routine, args.plot, bench_variant, outpath))
 173
 174     return y, title_final, outpath_final
 175
 176
 177 def plotThroughput(timings, params, routine, bench_variant, title, outpath):
 178     """Plot throughput.
 179
 180     Throughput is computed as the varied parameter value over timing.
 181
 182     Args:
 183         timings: timings to plot
 184         params: varied parameter values
 185         routine: benchmarked string routine name
 186         bench_variant: top-level benchmark variant name
 187         title: figure title (generated so far)
 188         outpath: output file path (generated so far)
 189     Return:
 190         y: y-axis values to plot
 191         title_final: final figure title
 192         outpath_final: file output file path
 193     """
 194     y = np.divide(params, timings)
 195     plt.figure()
 196
 197     if not args.values:
 198         plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
 199
 200     plt.ylabel("%s / timing" % args.key)
 201     title_final = "%s %s benchmark throughput results\n%s" % \
 202                   (routine, bench_variant, title)
 203     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 204                     (routine, args.plot, bench_variant, outpath))
 205     return y, title_final, outpath_final
 206
 207
 208 def finishPlot(x, y, title, outpath, x_scale, plotted_ifuncs):
 209     """Finish generating current Figure.
 210
 211     Args:
 212         x: x-axis values
 213         y: y-axis values
 214         title: figure title
 215         outpath: output file path
 216         x_scale: x-axis scale
 217         plotted_ifuncs: names of ifuncs to plot
 218     """
 219     plt.xlabel(args.key)
 220     plt.xscale(x_scale)
 221     plt.title(title)
 222
 223     plt.grid(color="k", linestyle=args.grid, linewidth=0.5, alpha=0.5)
 224
 225     for i in range(len(plotted_ifuncs)):
 226         plt.plot(x, y[i], marker=markers[i % len(markers)],
 227                  label=plotted_ifuncs[i])
 228
 229     plt.legend(loc="best", fontsize="small")
 230     plt.savefig("%s_%s.%s" % (outpath, x_scale, args.extension),
 231                 format=args.extension, dpi=args.resolution)
 232
 233     if args.display:
 234         plt.show()
 235
 236     plt.close()
 237
 238
 239 def plotRecursive(json_iter, routine, ifuncs, bench_variant, title, outpath,
 240                   x_scale):
 241     """Plot benchmark timings.
 242
 243     Args:
 244         json_iter: reference to json object
 245         routine: benchmarked string routine name
 246         ifuncs: names of ifuncs tested
 247         bench_variant: top-level benchmark variant name
 248         title: figure's title (generated so far)
 249         outpath: output file path (generated so far)
 250         x_scale: x-axis scale
 251     """
 252
 253     # RECURSIVE CASE: 'variants' array found
 254     if "variants" in json_iter:
 255         # Continue recursive search for 'results' array. Record the
 256         # benchmark variant (configuration) in order to customize
 257         # the title, filename and X-axis scale for the generated figure.
 258         for variant in json_iter["variants"]:
 259             new_title = "%s%s, " % (title, variant["name"])
 260             new_outpath = "%s_%s" % (outpath, variant["name"].replace(" ", "_"))
 261             new_x_scale = "log" if variant["name"] in log_variants else x_scale
 262
 263             plotRecursive(variant, routine, ifuncs, bench_variant, new_title,
 264                           new_outpath, new_x_scale)
 265         return
 266
 267     # BASE CASE: 'results' array found
 268     domain = []
 269     timings = defaultdict(list)
 270
 271     # Collect timings
 272     for result in json_iter["results"]:
 273         domain.append(result[args.key])
 274         timings[result[args.key]].append(result["timings"])
 275
 276     domain = np.unique(np.array(domain))
 277     averages = []
 278
 279     # Compute geometric mean if there are multple timings for each
 280     # parameter value.
 281     for parameter in domain:
 282         averages.append(gmean(timings[parameter]))
 283
 284     averages = np.array(averages).transpose()
 285
 286     # Choose ifuncs to plot
 287     if isinstance(args.ifuncs, str):
 288         plotted_ifuncs = ifuncs
 289     else:
 290         plotted_ifuncs = [x.replace("__", "") for x in args.ifuncs]
 291
 292     plotted_indices = [ifuncs.index(x) for x in plotted_ifuncs]
 293     plotted_vals = averages[plotted_indices,:]
 294
 295     # Plotting logic specific to each plot type
 296     if args.plot == "time":
 297         codomain, title, outpath = plotTime(plotted_vals, routine,
 298                                    bench_variant, title, outpath)
 299     elif args.plot == "rel":
 300         codomain, title, outpath = plotRelative(plotted_vals, averages, routine,
 301                                    ifuncs, bench_variant, title, outpath)
 302     elif args.plot == "max":
 303         codomain, title, outpath = plotMax(plotted_vals, routine,
 304                                    bench_variant, title, outpath)
 305     elif args.plot == "thru":
 306         codomain, title, outpath = plotThroughput(plotted_vals, domain, routine,
 307                                    bench_variant, title, outpath)
 308
 309     # Plotting logic shared between plot types
 310     finishPlot(domain, codomain, title, outpath, x_scale, plotted_ifuncs)
 311
 312
 313 def main(args):
 314     """Program Entry Point.
 315
 316     Args:
 317       args: command line arguments (excluding program name)
 318     """
 319
 320     # Select non-GUI matplotlib backend if interactive display is disabled
 321     if not args.display:
 322         mpl.use("Agg")
 323
 324     global plt
 325     import matplotlib.pyplot as plt
 326
 327     schema = None
 328
 329     with open(args.schema, "r") as f:
 330         schema = json.load(f)
 331
 332     for filename in args.bench:
 333         bench = None
 334
 335         if filename == '-':
 336             bench = json.load(sys.stdin)
 337         else:
 338             with open(filename, "r") as f:
 339                 bench = json.load(f)
 340
 341         validator.validate(bench, schema)
 342
 343         for function in bench["functions"]:
 344             bench_variant = bench["functions"][function]["bench-variant"]
 345             ifuncs = bench["functions"][function]["ifuncs"]
 346             ifuncs = [x.replace("__", "") for x in ifuncs]
 347
 348             plotRecursive(bench["functions"][function], function, ifuncs,
 349                           bench_variant, "", "", args.logarithmic)
 350
 351
 352 """ main() """
 353 if __name__ == "__main__":
 354
 355     parser = argparse.ArgumentParser(description=
 356             "Plot string microbenchmark results",
 357             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 358
 359     # Required parameter
 360     parser.add_argument("bench", nargs="+",
 361                         help="benchmark results file(s) in json format, " \
 362                         "and/or '-' as a benchmark result file from stdin")
 363
 364     # Optional parameters
 365     parser.add_argument("-b", "--baseline", type=str,
 366                         help="baseline ifunc for 'rel' plot")
 367     parser.add_argument("-d", "--display", action="store_true",
 368                         help="display figures")
 369     parser.add_argument("-e", "--extension", type=str, default="png",
 370                         choices=["png", "pdf", "svg"],
 371                         help="output file(s) extension")
 372     parser.add_argument("-g", "--grid", action="store_const", default="",
 373                         const="-", help="show grid lines")
 374     parser.add_argument("-i", "--ifuncs", nargs="+", default="all",
 375                         help="ifuncs to plot")
 376     parser.add_argument("-k", "--key", type=str, default="length",
 377                         help="key to access the varied parameter")
 378     parser.add_argument("-l", "--logarithmic", action="store_const",
 379                         default="linear", const="log",
 380                         help="use logarithmic x-axis scale")
 381     parser.add_argument("-o", "--outdir", type=str, default=os.getcwd(),
 382                         help="output directory")
 383     parser.add_argument("-p", "--plot", type=str, default="time",
 384                         choices=["time", "rel", "max", "thru"],
 385                         help="plot absolute timings, relative timings, " \
 386                         "performance relative to max, or throughput")
 387     parser.add_argument("-r", "--resolution", type=int, default=100,
 388                         help="dpi resolution for the generated figures")
 389     parser.add_argument("-s", "--schema", type=str,
 390                         default=os.path.join(os.path.dirname(
 391                         os.path.realpath(__file__)),
 392                         "benchout_strings.schema.json"),
 393                         help="schema file to validate the results file.")
 394     parser.add_argument("-t", "--threshold", type=int, default=5,
 395                         help="threshold to mark in 'rel' graph (in %%)")
 396     parser.add_argument("-v", "--values", action="store_true",
 397                         help="show actual values")
 398
 399     args = parser.parse_args()
 400     main(args)