Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-4.0-pull-request...
[qemu/kevin.git] / tests / fp / fp-bench.c
blobf5bc5edebfe0b155a538133b474405cc8bd9d2f8
1 /*
2 * fp-bench.c - A collection of simple floating point microbenchmarks.
4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9 #ifndef HW_POISON_H
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
11 #endif
13 #include "qemu/osdep.h"
14 #include <math.h>
15 #include <fenv.h>
16 #include "qemu/timer.h"
17 #include "fpu/softfloat.h"
19 /* amortize the computation of random inputs */
20 #define OPS_PER_ITER 50000
22 #define MAX_OPERANDS 3
24 #define SEED_A 0xdeadfacedeadface
25 #define SEED_B 0xbadc0feebadc0fee
26 #define SEED_C 0xbeefdeadbeefdead
28 enum op {
29 OP_ADD,
30 OP_SUB,
31 OP_MUL,
32 OP_DIV,
33 OP_FMA,
34 OP_SQRT,
35 OP_CMP,
36 OP_MAX_NR,
39 static const char * const op_names[] = {
40 [OP_ADD] = "add",
41 [OP_SUB] = "sub",
42 [OP_MUL] = "mul",
43 [OP_DIV] = "div",
44 [OP_FMA] = "mulAdd",
45 [OP_SQRT] = "sqrt",
46 [OP_CMP] = "cmp",
47 [OP_MAX_NR] = NULL,
50 enum precision {
51 PREC_SINGLE,
52 PREC_DOUBLE,
53 PREC_FLOAT32,
54 PREC_FLOAT64,
55 PREC_MAX_NR,
58 enum rounding {
59 ROUND_EVEN,
60 ROUND_ZERO,
61 ROUND_DOWN,
62 ROUND_UP,
63 ROUND_TIEAWAY,
64 N_ROUND_MODES,
67 static const char * const round_names[] = {
68 [ROUND_EVEN] = "even",
69 [ROUND_ZERO] = "zero",
70 [ROUND_DOWN] = "down",
71 [ROUND_UP] = "up",
72 [ROUND_TIEAWAY] = "tieaway",
75 enum tester {
76 TESTER_SOFT,
77 TESTER_HOST,
78 TESTER_MAX_NR,
81 static const char * const tester_names[] = {
82 [TESTER_SOFT] = "soft",
83 [TESTER_HOST] = "host",
84 [TESTER_MAX_NR] = NULL,
87 union fp {
88 float f;
89 double d;
90 float32 f32;
91 float64 f64;
92 uint64_t u64;
95 struct op_state;
97 typedef float (*float_func_t)(const struct op_state *s);
98 typedef double (*double_func_t)(const struct op_state *s);
100 union fp_func {
101 float_func_t float_func;
102 double_func_t double_func;
105 typedef void (*bench_func_t)(void);
107 struct op_desc {
108 const char * const name;
111 #define DEFAULT_DURATION_SECS 1
113 static uint64_t random_ops[MAX_OPERANDS] = {
114 SEED_A, SEED_B, SEED_C,
116 static float_status soft_status;
117 static enum precision precision;
118 static enum op operation;
119 static enum tester tester;
120 static uint64_t n_completed_ops;
121 static unsigned int duration = DEFAULT_DURATION_SECS;
122 static int64_t ns_elapsed;
123 /* disable optimizations with volatile */
124 static volatile union fp res;
127 * From: https://en.wikipedia.org/wiki/Xorshift
128 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
129 * guaranteed to be >= INT_MAX).
131 static uint64_t xorshift64star(uint64_t x)
133 x ^= x >> 12; /* a */
134 x ^= x << 25; /* b */
135 x ^= x >> 27; /* c */
136 return x * UINT64_C(2685821657736338717);
139 static void update_random_ops(int n_ops, enum precision prec)
141 int i;
143 for (i = 0; i < n_ops; i++) {
144 uint64_t r = random_ops[i];
146 if (prec == PREC_SINGLE || PREC_FLOAT32) {
147 do {
148 r = xorshift64star(r);
149 } while (!float32_is_normal(r));
150 } else if (prec == PREC_DOUBLE || PREC_FLOAT64) {
151 do {
152 r = xorshift64star(r);
153 } while (!float64_is_normal(r));
154 } else {
155 g_assert_not_reached();
157 random_ops[i] = r;
161 static void fill_random(union fp *ops, int n_ops, enum precision prec,
162 bool no_neg)
164 int i;
166 for (i = 0; i < n_ops; i++) {
167 switch (prec) {
168 case PREC_SINGLE:
169 case PREC_FLOAT32:
170 ops[i].f32 = make_float32(random_ops[i]);
171 if (no_neg && float32_is_neg(ops[i].f32)) {
172 ops[i].f32 = float32_chs(ops[i].f32);
174 /* raise the exponent to limit the frequency of denormal results */
175 ops[i].f32 |= 0x40000000;
176 break;
177 case PREC_DOUBLE:
178 case PREC_FLOAT64:
179 ops[i].f64 = make_float64(random_ops[i]);
180 if (no_neg && float64_is_neg(ops[i].f64)) {
181 ops[i].f64 = float64_chs(ops[i].f64);
183 /* raise the exponent to limit the frequency of denormal results */
184 ops[i].f64 |= LIT64(0x4000000000000000);
185 break;
186 default:
187 g_assert_not_reached();
193 * The main benchmark function. Instead of (ab)using macros, we rely
194 * on the compiler to unfold this at compile-time.
196 static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
198 int64_t tf = get_clock() + duration * 1000000000LL;
200 while (get_clock() < tf) {
201 union fp ops[MAX_OPERANDS];
202 int64_t t0;
203 int i;
205 update_random_ops(n_ops, prec);
206 switch (prec) {
207 case PREC_SINGLE:
208 fill_random(ops, n_ops, prec, no_neg);
209 t0 = get_clock();
210 for (i = 0; i < OPS_PER_ITER; i++) {
211 float a = ops[0].f;
212 float b = ops[1].f;
213 float c = ops[2].f;
215 switch (op) {
216 case OP_ADD:
217 res.f = a + b;
218 break;
219 case OP_SUB:
220 res.f = a - b;
221 break;
222 case OP_MUL:
223 res.f = a * b;
224 break;
225 case OP_DIV:
226 res.f = a / b;
227 break;
228 case OP_FMA:
229 res.f = fmaf(a, b, c);
230 break;
231 case OP_SQRT:
232 res.f = sqrtf(a);
233 break;
234 case OP_CMP:
235 res.u64 = isgreater(a, b);
236 break;
237 default:
238 g_assert_not_reached();
241 break;
242 case PREC_DOUBLE:
243 fill_random(ops, n_ops, prec, no_neg);
244 t0 = get_clock();
245 for (i = 0; i < OPS_PER_ITER; i++) {
246 double a = ops[0].d;
247 double b = ops[1].d;
248 double c = ops[2].d;
250 switch (op) {
251 case OP_ADD:
252 res.d = a + b;
253 break;
254 case OP_SUB:
255 res.d = a - b;
256 break;
257 case OP_MUL:
258 res.d = a * b;
259 break;
260 case OP_DIV:
261 res.d = a / b;
262 break;
263 case OP_FMA:
264 res.d = fma(a, b, c);
265 break;
266 case OP_SQRT:
267 res.d = sqrt(a);
268 break;
269 case OP_CMP:
270 res.u64 = isgreater(a, b);
271 break;
272 default:
273 g_assert_not_reached();
276 break;
277 case PREC_FLOAT32:
278 fill_random(ops, n_ops, prec, no_neg);
279 t0 = get_clock();
280 for (i = 0; i < OPS_PER_ITER; i++) {
281 float32 a = ops[0].f32;
282 float32 b = ops[1].f32;
283 float32 c = ops[2].f32;
285 switch (op) {
286 case OP_ADD:
287 res.f32 = float32_add(a, b, &soft_status);
288 break;
289 case OP_SUB:
290 res.f32 = float32_sub(a, b, &soft_status);
291 break;
292 case OP_MUL:
293 res.f = float32_mul(a, b, &soft_status);
294 break;
295 case OP_DIV:
296 res.f32 = float32_div(a, b, &soft_status);
297 break;
298 case OP_FMA:
299 res.f32 = float32_muladd(a, b, c, 0, &soft_status);
300 break;
301 case OP_SQRT:
302 res.f32 = float32_sqrt(a, &soft_status);
303 break;
304 case OP_CMP:
305 res.u64 = float32_compare_quiet(a, b, &soft_status);
306 break;
307 default:
308 g_assert_not_reached();
311 break;
312 case PREC_FLOAT64:
313 fill_random(ops, n_ops, prec, no_neg);
314 t0 = get_clock();
315 for (i = 0; i < OPS_PER_ITER; i++) {
316 float64 a = ops[0].f64;
317 float64 b = ops[1].f64;
318 float64 c = ops[2].f64;
320 switch (op) {
321 case OP_ADD:
322 res.f64 = float64_add(a, b, &soft_status);
323 break;
324 case OP_SUB:
325 res.f64 = float64_sub(a, b, &soft_status);
326 break;
327 case OP_MUL:
328 res.f = float64_mul(a, b, &soft_status);
329 break;
330 case OP_DIV:
331 res.f64 = float64_div(a, b, &soft_status);
332 break;
333 case OP_FMA:
334 res.f64 = float64_muladd(a, b, c, 0, &soft_status);
335 break;
336 case OP_SQRT:
337 res.f64 = float64_sqrt(a, &soft_status);
338 break;
339 case OP_CMP:
340 res.u64 = float64_compare_quiet(a, b, &soft_status);
341 break;
342 default:
343 g_assert_not_reached();
346 break;
347 default:
348 g_assert_not_reached();
350 ns_elapsed += get_clock() - t0;
351 n_completed_ops += OPS_PER_ITER;
355 #define GEN_BENCH(name, type, prec, op, n_ops) \
356 static void __attribute__((flatten)) name(void) \
358 bench(prec, op, n_ops, false); \
361 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
362 static void __attribute__((flatten)) name(void) \
364 bench(prec, op, n_ops, true); \
367 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
368 GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
369 GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
370 GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
371 GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
373 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
374 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
375 GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
376 GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
377 GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
378 GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
379 #undef GEN_BENCH_ALL_TYPES
381 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
382 GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
383 GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
384 GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
385 GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
387 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
388 #undef GEN_BENCH_ALL_TYPES_NO_NEG
390 #undef GEN_BENCH_NO_NEG
391 #undef GEN_BENCH
393 #define GEN_BENCH_FUNCS(opname, op) \
394 [op] = { \
395 [PREC_SINGLE] = bench_ ## opname ## _float, \
396 [PREC_DOUBLE] = bench_ ## opname ## _double, \
397 [PREC_FLOAT32] = bench_ ## opname ## _float32, \
398 [PREC_FLOAT64] = bench_ ## opname ## _float64, \
401 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
402 GEN_BENCH_FUNCS(add, OP_ADD),
403 GEN_BENCH_FUNCS(sub, OP_SUB),
404 GEN_BENCH_FUNCS(mul, OP_MUL),
405 GEN_BENCH_FUNCS(div, OP_DIV),
406 GEN_BENCH_FUNCS(fma, OP_FMA),
407 GEN_BENCH_FUNCS(sqrt, OP_SQRT),
408 GEN_BENCH_FUNCS(cmp, OP_CMP),
411 #undef GEN_BENCH_FUNCS
413 static void run_bench(void)
415 bench_func_t f;
417 f = bench_funcs[operation][precision];
418 g_assert(f);
419 f();
422 /* @arr must be NULL-terminated */
423 static int find_name(const char * const *arr, const char *name)
425 int i;
427 for (i = 0; arr[i] != NULL; i++) {
428 if (strcmp(name, arr[i]) == 0) {
429 return i;
432 return -1;
435 static void usage_complete(int argc, char *argv[])
437 gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
438 gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
440 fprintf(stderr, "Usage: %s [options]\n", argv[0]);
441 fprintf(stderr, "options:\n");
442 fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
443 DEFAULT_DURATION_SECS);
444 fprintf(stderr, " -h = show this help message.\n");
445 fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
446 op_list, op_names[0]);
447 fprintf(stderr, " -p = floating point precision (single, double). "
448 "Default: single\n");
449 fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
450 "Default: even\n");
451 fprintf(stderr, " -t = tester (%s). Default: %s\n",
452 tester_list, tester_names[0]);
453 fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
454 "Default: disabled\n");
455 fprintf(stderr, " -Z = flush output to zero (soft tester only). "
456 "Default: disabled\n");
458 g_free(tester_list);
459 g_free(op_list);
462 static int round_name_to_mode(const char *name)
464 int i;
466 for (i = 0; i < N_ROUND_MODES; i++) {
467 if (!strcmp(round_names[i], name)) {
468 return i;
471 return -1;
474 static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
476 fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
477 round_names[rounding]);
478 exit(EXIT_FAILURE);
481 static void set_host_precision(enum rounding rounding)
483 int rhost;
485 switch (rounding) {
486 case ROUND_EVEN:
487 rhost = FE_TONEAREST;
488 break;
489 case ROUND_ZERO:
490 rhost = FE_TOWARDZERO;
491 break;
492 case ROUND_DOWN:
493 rhost = FE_DOWNWARD;
494 break;
495 case ROUND_UP:
496 rhost = FE_UPWARD;
497 break;
498 case ROUND_TIEAWAY:
499 die_host_rounding(rounding);
500 return;
501 default:
502 g_assert_not_reached();
505 if (fesetround(rhost)) {
506 die_host_rounding(rounding);
510 static void set_soft_precision(enum rounding rounding)
512 signed char mode;
514 switch (rounding) {
515 case ROUND_EVEN:
516 mode = float_round_nearest_even;
517 break;
518 case ROUND_ZERO:
519 mode = float_round_to_zero;
520 break;
521 case ROUND_DOWN:
522 mode = float_round_down;
523 break;
524 case ROUND_UP:
525 mode = float_round_up;
526 break;
527 case ROUND_TIEAWAY:
528 mode = float_round_ties_away;
529 break;
530 default:
531 g_assert_not_reached();
533 soft_status.float_rounding_mode = mode;
536 static void parse_args(int argc, char *argv[])
538 int c;
539 int val;
540 int rounding = ROUND_EVEN;
542 for (;;) {
543 c = getopt(argc, argv, "d:ho:p:r:t:zZ");
544 if (c < 0) {
545 break;
547 switch (c) {
548 case 'd':
549 duration = atoi(optarg);
550 break;
551 case 'h':
552 usage_complete(argc, argv);
553 exit(EXIT_SUCCESS);
554 case 'o':
555 val = find_name(op_names, optarg);
556 if (val < 0) {
557 fprintf(stderr, "Unsupported op '%s'\n", optarg);
558 exit(EXIT_FAILURE);
560 operation = val;
561 break;
562 case 'p':
563 if (!strcmp(optarg, "single")) {
564 precision = PREC_SINGLE;
565 } else if (!strcmp(optarg, "double")) {
566 precision = PREC_DOUBLE;
567 } else {
568 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
569 exit(EXIT_FAILURE);
571 break;
572 case 'r':
573 rounding = round_name_to_mode(optarg);
574 if (rounding < 0) {
575 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
576 exit(EXIT_FAILURE);
578 break;
579 case 't':
580 val = find_name(tester_names, optarg);
581 if (val < 0) {
582 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
583 exit(EXIT_FAILURE);
585 tester = val;
586 break;
587 case 'z':
588 soft_status.flush_inputs_to_zero = 1;
589 break;
590 case 'Z':
591 soft_status.flush_to_zero = 1;
592 break;
596 /* set precision and rounding mode based on the tester */
597 switch (tester) {
598 case TESTER_HOST:
599 set_host_precision(rounding);
600 break;
601 case TESTER_SOFT:
602 set_soft_precision(rounding);
603 switch (precision) {
604 case PREC_SINGLE:
605 precision = PREC_FLOAT32;
606 break;
607 case PREC_DOUBLE:
608 precision = PREC_FLOAT64;
609 break;
610 default:
611 g_assert_not_reached();
613 break;
614 default:
615 g_assert_not_reached();
619 static void pr_stats(void)
621 printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
624 int main(int argc, char *argv[])
626 parse_args(argc, argv);
627 run_bench();
628 pr_stats();
629 return 0;