2 * Copyright (c) 2018 Jiri Svoboda
3 * Copyright (c) 2018 Vojtech Horky
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup hbench
46 #include <str_error.h>
48 #include <types/casting.h>
51 #define MAX_ERROR_STR_LENGTH 1024
53 static void short_report(bench_run_t
*info
, int run_index
,
54 benchmark_t
*bench
, uint64_t workload_size
)
56 csv_report_add_entry(info
, run_index
, bench
, workload_size
);
58 usec_t duration_usec
= NSEC2USEC(stopwatch_get_nanos(&info
->stopwatch
));
60 printf("Completed %" PRIu64
" operations in %llu us",
61 workload_size
, duration_usec
);
62 if (duration_usec
> 0) {
63 double nanos
= stopwatch_get_nanos(&info
->stopwatch
);
64 double thruput
= (double) workload_size
/ (nanos
/ 1000000000.0l);
65 printf(", %.0f ops/s.\n", thruput
);
71 /** Estimate square root value.
73 * @param value The value to compute square root of.
74 * @param precision Required precision (e.g. 0.00001).
78 * This is a temporary solution until we have proper sqrt() implementation
81 * The algorithm uses Babylonian method [1].
83 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method
85 static double estimate_square_root(double value
, double precision
)
88 double prev_estimate
= estimate
+ 10 * precision
;
90 while (fabs(estimate
- prev_estimate
) > precision
) {
91 prev_estimate
= estimate
;
92 estimate
= (prev_estimate
+ value
/ prev_estimate
) / 2.;
98 /** Compute available statistics from given stopwatches.
100 * We compute normal mean for average duration of the workload and geometric
101 * mean for average thruput. Note that geometric mean is necessary to compute
102 * average throughput correctly - consider the following example:
103 * - we run always 60 operations,
104 * - first run executes in 30 s (i.e. 2 ops/s)
105 * - and second one in 10 s (6 ops/s).
106 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we
107 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average
108 * throughput is 3 ops/s (which is exactly what geometric mean means).
111 static void compute_stats(bench_run_t
*runs
, size_t run_count
,
112 uint64_t workload_size
, double precision
, double *out_duration_avg
,
113 double *out_duration_sigma
, double *out_thruput_avg
)
115 double inv_thruput_sum
= 0.0;
116 double nanos_sum
= 0.0;
117 double nanos_sum2
= 0.0;
119 for (size_t i
= 0; i
< run_count
; i
++) {
120 double nanos
= stopwatch_get_nanos(&runs
[i
].stopwatch
);
121 double thruput
= (double) workload_size
/ nanos
;
123 inv_thruput_sum
+= 1.0 / thruput
;
125 nanos_sum2
+= nanos
* nanos
;
127 *out_duration_avg
= nanos_sum
/ run_count
;
128 double sigma2
= (nanos_sum2
- nanos_sum
* (*out_duration_avg
)) /
129 ((double) run_count
- 1);
130 // FIXME: implement sqrt properly
132 *out_duration_sigma
= estimate_square_root(sigma2
, precision
);
134 *out_duration_sigma
= NAN
;
136 *out_thruput_avg
= 1.0 / (inv_thruput_sum
/ run_count
);
139 static void summary_stats(bench_run_t
*runs
, size_t run_count
,
140 benchmark_t
*bench
, uint64_t workload_size
)
142 double duration_avg
, duration_sigma
, thruput_avg
;
143 compute_stats(runs
, run_count
, workload_size
, 0.001,
144 &duration_avg
, &duration_sigma
, &thruput_avg
);
146 printf("Average: %" PRIu64
" ops in %.0f us (sd %.0f us); "
147 "%.0f ops/s; Samples: %zu\n",
148 workload_size
, duration_avg
/ 1000.0, duration_sigma
/ 1000.0,
149 thruput_avg
* 1000000000.0, run_count
);
152 static bool run_benchmark(bench_env_t
*env
, benchmark_t
*bench
)
154 printf("Warm up and determine workload size...\n");
157 * We share this buffer across all runs as we know that it is
158 * used only on failure (and we abort after first error).
160 char *error_msg
= malloc(MAX_ERROR_STR_LENGTH
+ 1);
161 if (error_msg
== NULL
) {
162 printf("Out of memory!\n");
165 str_cpy(error_msg
, MAX_ERROR_STR_LENGTH
, "");
167 bench_run_t helper_run
;
168 bench_run_init(&helper_run
, error_msg
, MAX_ERROR_STR_LENGTH
);
172 if (bench
->setup
!= NULL
) {
173 ret
= bench
->setup(env
, &helper_run
);
180 * Find workload size that is big enough to last few seconds.
181 * We also check that uint64_t is big enough.
183 uint64_t workload_size
= 0;
184 for (size_t bits
= 0; bits
<= 64; bits
++) {
186 str_cpy(error_msg
, MAX_ERROR_STR_LENGTH
, "Workload too small even for 1 << 63");
189 workload_size
= ((uint64_t) 1) << bits
;
192 bench_run_init(&run
, error_msg
, MAX_ERROR_STR_LENGTH
);
194 bool ok
= bench
->entry(env
, &run
, workload_size
);
198 short_report(&run
, -1, bench
, workload_size
);
200 nsec_t duration
= stopwatch_get_nanos(&run
.stopwatch
);
201 if (duration
> env
->minimal_run_duration_nanos
) {
206 printf("Workload size set to %" PRIu64
", measuring %zu samples.\n",
207 workload_size
, env
->run_count
);
209 bench_run_t
*runs
= calloc(env
->run_count
, sizeof(bench_run_t
));
211 snprintf(error_msg
, MAX_ERROR_STR_LENGTH
, "failed allocating memory");
214 for (size_t i
= 0; i
< env
->run_count
; i
++) {
215 bench_run_init(&runs
[i
], error_msg
, MAX_ERROR_STR_LENGTH
);
217 bool ok
= bench
->entry(env
, &runs
[i
], workload_size
);
222 short_report(&runs
[i
], i
, bench
, workload_size
);
225 summary_stats(runs
, env
->run_count
, bench
, workload_size
);
226 printf("\nBenchmark completed\n");
233 printf("Error: %s\n", error_msg
);
237 if (bench
->teardown
!= NULL
) {
238 bool ok
= bench
->teardown(env
, &helper_run
);
240 printf("Error: %s\n", error_msg
);
250 static int run_benchmarks(bench_env_t
*env
)
252 unsigned int count_ok
= 0;
253 unsigned int count_fail
= 0;
255 char *failed_names
= NULL
;
257 printf("\n*** Running all benchmarks ***\n\n");
259 for (size_t it
= 0; it
< benchmark_count
; it
++) {
260 printf("%s (%s)\n", benchmarks
[it
]->name
, benchmarks
[it
]->desc
);
261 if (run_benchmark(env
, benchmarks
[it
])) {
267 failed_names
= str_dup(benchmarks
[it
]->name
);
270 asprintf(&f
, "%s, %s", failed_names
, benchmarks
[it
]->name
);
272 printf("Out of memory.\n");
281 printf("\nCompleted, %u benchmarks run, %u succeeded.\n",
282 count_ok
+ count_fail
, count_ok
);
284 printf("Failed benchmarks: %s\n", failed_names
);
289 static void list_benchmarks(void)
292 for (size_t i
= 0; i
< benchmark_count
; i
++) {
293 size_t len_now
= str_length(benchmarks
[i
]->name
);
298 assert(can_cast_size_t_to_int(len
) && "benchmark name length overflow");
300 for (size_t i
= 0; i
< benchmark_count
; i
++)
301 printf(" %-*s %s\n", (int) len
, benchmarks
[i
]->name
, benchmarks
[i
]->desc
);
303 printf(" %-*s Run all benchmarks\n", (int) len
, "*");
306 static void print_usage(const char *progname
)
308 printf("Usage: %s [options] <benchmark>\n", progname
);
310 "Print this help and exit\n");
311 printf("-d, --duration MILLIS "
312 "Set minimal run duration (milliseconds)\n");
313 printf("-n, --count N "
314 "Set number of measured runs\n");
315 printf("-o, --output filename.csv "
316 "Store machine-readable data in filename.csv\n");
317 printf("-p, --param KEY=VALUE "
318 "Additional parameters for the benchmark\n");
319 printf("<benchmark> is one of the following:\n");
323 static void handle_param_arg(bench_env_t
*env
, char *arg
)
326 char *key
= str_tok(arg
, "=", &value
);
327 bench_env_param_set(env
, key
, value
);
330 int main(int argc
, char *argv
[])
332 bench_env_t bench_env
;
333 errno_t rc
= bench_env_init(&bench_env
);
335 fprintf(stderr
, "Failed to initialize internal params structure: %s\n",
340 const char *short_options
= "ho:p:n:d:";
341 struct option long_options
[] = {
342 { "duration", required_argument
, NULL
, 'd' },
343 { "help", optional_argument
, NULL
, 'h' },
344 { "count", required_argument
, NULL
, 'n' },
345 { "output", required_argument
, NULL
, 'o' },
346 { "param", required_argument
, NULL
, 'p' },
350 char *csv_output_filename
= NULL
;
353 while ((opt
= getopt_long(argc
, argv
, short_options
, long_options
, NULL
)) > 0) {
357 bench_env
.minimal_run_duration_nanos
= MSEC2NSEC(atoll(optarg
));
358 if ((errno
!= EOK
) || (bench_env
.minimal_run_duration_nanos
<= 0)) {
359 fprintf(stderr
, "Invalid -d argument.\n");
368 bench_env
.run_count
= (nsec_t
) atoll(optarg
);
369 if ((errno
!= EOK
) || (bench_env
.run_count
<= 0)) {
370 fprintf(stderr
, "Invalid -n argument.\n");
375 csv_output_filename
= optarg
;
378 handle_param_arg(&bench_env
, optarg
);
386 if (optind
+ 1 != argc
) {
388 fprintf(stderr
, "Error: specify one benchmark to run or * for all.\n");
392 const char *benchmark
= argv
[optind
];
394 if (csv_output_filename
!= NULL
) {
395 errno_t rc
= csv_report_open(csv_output_filename
);
397 fprintf(stderr
, "Failed to open CSV report '%s': %s\n",
398 csv_output_filename
, str_error(rc
));
405 if (str_cmp(benchmark
, "*") == 0) {
406 exit_code
= run_benchmarks(&bench_env
);
408 bool benchmark_exists
= false;
409 for (size_t i
= 0; i
< benchmark_count
; i
++) {
410 if (str_cmp(benchmark
, benchmarks
[i
]->name
) == 0) {
411 benchmark_exists
= true;
412 exit_code
= run_benchmark(&bench_env
, benchmarks
[i
]) ? 0 : -1;
416 if (!benchmark_exists
) {
417 printf("Unknown benchmark \"%s\"\n", benchmark
);
423 bench_env_cleanup(&bench_env
);