2 * fp-bench.c - A collection of simple floating point microbenchmarks.
4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
13 #include "qemu/osdep.h"
16 #include "qemu/timer.h"
17 #include "qemu/int128.h"
18 #include "fpu/softfloat.h"
20 /* amortize the computation of random inputs */
21 #define OPS_PER_ITER 50000
23 #define MAX_OPERANDS 3
25 #define SEED_A 0xdeadfacedeadface
26 #define SEED_B 0xbadc0feebadc0fee
27 #define SEED_C 0xbeefdeadbeefdead
40 static const char * const op_names
[] = {
70 static const char * const round_names
[] = {
71 [ROUND_EVEN
] = "even",
72 [ROUND_ZERO
] = "zero",
73 [ROUND_DOWN
] = "down",
75 [ROUND_TIEAWAY
] = "tieaway",
84 static const char * const tester_names
[] = {
85 [TESTER_SOFT
] = "soft",
86 [TESTER_HOST
] = "host",
87 [TESTER_MAX_NR
] = NULL
,
101 typedef float (*float_func_t
)(const struct op_state
*s
);
102 typedef double (*double_func_t
)(const struct op_state
*s
);
105 float_func_t float_func
;
106 double_func_t double_func
;
109 typedef void (*bench_func_t
)(void);
112 const char * const name
;
115 #define DEFAULT_DURATION_SECS 1
117 static uint64_t random_ops
[MAX_OPERANDS
] = {
118 SEED_A
, SEED_B
, SEED_C
,
121 static float128 random_quad_ops
[MAX_OPERANDS
] = {
122 {SEED_A
, SEED_B
}, {SEED_B
, SEED_C
}, {SEED_C
, SEED_A
},
124 static float_status soft_status
;
125 static enum precision precision
;
126 static enum op operation
;
127 static enum tester tester
;
128 static uint64_t n_completed_ops
;
129 static unsigned int duration
= DEFAULT_DURATION_SECS
;
130 static int64_t ns_elapsed
;
131 /* disable optimizations with volatile */
132 static volatile union fp res
;
135 * From: https://en.wikipedia.org/wiki/Xorshift
136 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
137 * guaranteed to be >= INT_MAX).
139 static uint64_t xorshift64star(uint64_t x
)
141 x
^= x
>> 12; /* a */
142 x
^= x
<< 25; /* b */
143 x
^= x
>> 27; /* c */
144 return x
* UINT64_C(2685821657736338717);
147 static void update_random_ops(int n_ops
, enum precision prec
)
151 for (i
= 0; i
< n_ops
; i
++) {
157 uint64_t r
= random_ops
[i
];
159 r
= xorshift64star(r
);
160 } while (!float32_is_normal(r
));
167 uint64_t r
= random_ops
[i
];
169 r
= xorshift64star(r
);
170 } while (!float64_is_normal(r
));
177 float128 r
= random_quad_ops
[i
];
178 uint64_t hi
= r
.high
;
181 hi
= xorshift64star(hi
);
182 lo
= xorshift64star(lo
);
183 r
= make_float128(hi
, lo
);
184 } while (!float128_is_normal(r
));
185 random_quad_ops
[i
] = r
;
189 g_assert_not_reached();
194 static void fill_random(union fp
*ops
, int n_ops
, enum precision prec
,
199 for (i
= 0; i
< n_ops
; i
++) {
203 ops
[i
].f32
= make_float32(random_ops
[i
]);
204 if (no_neg
&& float32_is_neg(ops
[i
].f32
)) {
205 ops
[i
].f32
= float32_chs(ops
[i
].f32
);
210 ops
[i
].f64
= make_float64(random_ops
[i
]);
211 if (no_neg
&& float64_is_neg(ops
[i
].f64
)) {
212 ops
[i
].f64
= float64_chs(ops
[i
].f64
);
217 ops
[i
].f128
= random_quad_ops
[i
];
218 if (no_neg
&& float128_is_neg(ops
[i
].f128
)) {
219 ops
[i
].f128
= float128_chs(ops
[i
].f128
);
223 g_assert_not_reached();
229 * The main benchmark function. Instead of (ab)using macros, we rely
230 * on the compiler to unfold this at compile-time.
232 static void bench(enum precision prec
, enum op op
, int n_ops
, bool no_neg
)
234 int64_t tf
= get_clock() + duration
* 1000000000LL;
236 while (get_clock() < tf
) {
237 union fp ops
[MAX_OPERANDS
];
241 update_random_ops(n_ops
, prec
);
244 fill_random(ops
, n_ops
, prec
, no_neg
);
246 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
265 res
.f
= fmaf(a
, b
, c
);
271 res
.u64
= isgreater(a
, b
);
274 g_assert_not_reached();
279 fill_random(ops
, n_ops
, prec
, no_neg
);
281 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
300 res
.d
= fma(a
, b
, c
);
306 res
.u64
= isgreater(a
, b
);
309 g_assert_not_reached();
314 fill_random(ops
, n_ops
, prec
, no_neg
);
316 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
317 float32 a
= ops
[0].f32
;
318 float32 b
= ops
[1].f32
;
319 float32 c
= ops
[2].f32
;
323 res
.f32
= float32_add(a
, b
, &soft_status
);
326 res
.f32
= float32_sub(a
, b
, &soft_status
);
329 res
.f
= float32_mul(a
, b
, &soft_status
);
332 res
.f32
= float32_div(a
, b
, &soft_status
);
335 res
.f32
= float32_muladd(a
, b
, c
, 0, &soft_status
);
338 res
.f32
= float32_sqrt(a
, &soft_status
);
341 res
.u64
= float32_compare_quiet(a
, b
, &soft_status
);
344 g_assert_not_reached();
349 fill_random(ops
, n_ops
, prec
, no_neg
);
351 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
352 float64 a
= ops
[0].f64
;
353 float64 b
= ops
[1].f64
;
354 float64 c
= ops
[2].f64
;
358 res
.f64
= float64_add(a
, b
, &soft_status
);
361 res
.f64
= float64_sub(a
, b
, &soft_status
);
364 res
.f
= float64_mul(a
, b
, &soft_status
);
367 res
.f64
= float64_div(a
, b
, &soft_status
);
370 res
.f64
= float64_muladd(a
, b
, c
, 0, &soft_status
);
373 res
.f64
= float64_sqrt(a
, &soft_status
);
376 res
.u64
= float64_compare_quiet(a
, b
, &soft_status
);
379 g_assert_not_reached();
384 fill_random(ops
, n_ops
, prec
, no_neg
);
386 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
387 float128 a
= ops
[0].f128
;
388 float128 b
= ops
[1].f128
;
389 float128 c
= ops
[2].f128
;
393 res
.f128
= float128_add(a
, b
, &soft_status
);
396 res
.f128
= float128_sub(a
, b
, &soft_status
);
399 res
.f128
= float128_mul(a
, b
, &soft_status
);
402 res
.f128
= float128_div(a
, b
, &soft_status
);
405 res
.f128
= float128_muladd(a
, b
, c
, 0, &soft_status
);
408 res
.f128
= float128_sqrt(a
, &soft_status
);
411 res
.u64
= float128_compare_quiet(a
, b
, &soft_status
);
414 g_assert_not_reached();
419 g_assert_not_reached();
421 ns_elapsed
+= get_clock() - t0
;
422 n_completed_ops
+= OPS_PER_ITER
;
426 #define GEN_BENCH(name, type, prec, op, n_ops) \
427 static void __attribute__((flatten)) name(void) \
429 bench(prec, op, n_ops, false); \
432 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
433 static void __attribute__((flatten)) name(void) \
435 bench(prec, op, n_ops, true); \
438 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
439 GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
440 GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
441 GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
442 GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
443 GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
445 GEN_BENCH_ALL_TYPES(add
, OP_ADD
, 2)
446 GEN_BENCH_ALL_TYPES(sub
, OP_SUB
, 2)
447 GEN_BENCH_ALL_TYPES(mul
, OP_MUL
, 2)
448 GEN_BENCH_ALL_TYPES(div
, OP_DIV
, 2)
449 GEN_BENCH_ALL_TYPES(fma
, OP_FMA
, 3)
450 GEN_BENCH_ALL_TYPES(cmp
, OP_CMP
, 2)
451 #undef GEN_BENCH_ALL_TYPES
453 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
454 GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
455 GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
456 GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
457 GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
458 GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
460 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt
, OP_SQRT
, 1)
461 #undef GEN_BENCH_ALL_TYPES_NO_NEG
463 #undef GEN_BENCH_NO_NEG
466 #define GEN_BENCH_FUNCS(opname, op) \
468 [PREC_SINGLE] = bench_ ## opname ## _float, \
469 [PREC_DOUBLE] = bench_ ## opname ## _double, \
470 [PREC_FLOAT32] = bench_ ## opname ## _float32, \
471 [PREC_FLOAT64] = bench_ ## opname ## _float64, \
472 [PREC_FLOAT128] = bench_ ## opname ## _float128, \
475 static const bench_func_t bench_funcs
[OP_MAX_NR
][PREC_MAX_NR
] = {
476 GEN_BENCH_FUNCS(add
, OP_ADD
),
477 GEN_BENCH_FUNCS(sub
, OP_SUB
),
478 GEN_BENCH_FUNCS(mul
, OP_MUL
),
479 GEN_BENCH_FUNCS(div
, OP_DIV
),
480 GEN_BENCH_FUNCS(fma
, OP_FMA
),
481 GEN_BENCH_FUNCS(sqrt
, OP_SQRT
),
482 GEN_BENCH_FUNCS(cmp
, OP_CMP
),
485 #undef GEN_BENCH_FUNCS
487 static void run_bench(void)
491 f
= bench_funcs
[operation
][precision
];
496 /* @arr must be NULL-terminated */
497 static int find_name(const char * const *arr
, const char *name
)
501 for (i
= 0; arr
[i
] != NULL
; i
++) {
502 if (strcmp(name
, arr
[i
]) == 0) {
509 static void usage_complete(int argc
, char *argv
[])
511 gchar
*op_list
= g_strjoinv(", ", (gchar
**)op_names
);
512 gchar
*tester_list
= g_strjoinv(", ", (gchar
**)tester_names
);
514 fprintf(stderr
, "Usage: %s [options]\n", argv
[0]);
515 fprintf(stderr
, "options:\n");
516 fprintf(stderr
, " -d = duration, in seconds. Default: %d\n",
517 DEFAULT_DURATION_SECS
);
518 fprintf(stderr
, " -h = show this help message.\n");
519 fprintf(stderr
, " -o = floating point operation (%s). Default: %s\n",
520 op_list
, op_names
[0]);
521 fprintf(stderr
, " -p = floating point precision (single, double, quad[soft only]). "
522 "Default: single\n");
523 fprintf(stderr
, " -r = rounding mode (even, zero, down, up, tieaway). "
525 fprintf(stderr
, " -t = tester (%s). Default: %s\n",
526 tester_list
, tester_names
[0]);
527 fprintf(stderr
, " -z = flush inputs to zero (soft tester only). "
528 "Default: disabled\n");
529 fprintf(stderr
, " -Z = flush output to zero (soft tester only). "
530 "Default: disabled\n");
536 static int round_name_to_mode(const char *name
)
540 for (i
= 0; i
< N_ROUND_MODES
; i
++) {
541 if (!strcmp(round_names
[i
], name
)) {
549 void die_host_rounding(enum rounding rounding
)
551 fprintf(stderr
, "fatal: '%s' rounding not supported on this host\n",
552 round_names
[rounding
]);
556 static void set_host_precision(enum rounding rounding
)
562 rhost
= FE_TONEAREST
;
565 rhost
= FE_TOWARDZERO
;
574 die_host_rounding(rounding
);
577 g_assert_not_reached();
580 if (fesetround(rhost
)) {
581 die_host_rounding(rounding
);
585 static void set_soft_precision(enum rounding rounding
)
591 mode
= float_round_nearest_even
;
594 mode
= float_round_to_zero
;
597 mode
= float_round_down
;
600 mode
= float_round_up
;
603 mode
= float_round_ties_away
;
606 g_assert_not_reached();
608 soft_status
.float_rounding_mode
= mode
;
611 static void parse_args(int argc
, char *argv
[])
615 int rounding
= ROUND_EVEN
;
618 c
= getopt(argc
, argv
, "d:ho:p:r:t:zZ");
624 duration
= atoi(optarg
);
627 usage_complete(argc
, argv
);
630 val
= find_name(op_names
, optarg
);
632 fprintf(stderr
, "Unsupported op '%s'\n", optarg
);
638 if (!strcmp(optarg
, "single")) {
639 precision
= PREC_SINGLE
;
640 } else if (!strcmp(optarg
, "double")) {
641 precision
= PREC_DOUBLE
;
642 } else if (!strcmp(optarg
, "quad")) {
643 precision
= PREC_QUAD
;
645 fprintf(stderr
, "Unsupported precision '%s'\n", optarg
);
650 rounding
= round_name_to_mode(optarg
);
652 fprintf(stderr
, "fatal: invalid rounding mode '%s'\n", optarg
);
657 val
= find_name(tester_names
, optarg
);
659 fprintf(stderr
, "Unsupported tester '%s'\n", optarg
);
665 soft_status
.flush_inputs_to_zero
= 1;
668 soft_status
.flush_to_zero
= 1;
673 /* set precision and rounding mode based on the tester */
676 set_host_precision(rounding
);
679 set_soft_precision(rounding
);
682 precision
= PREC_FLOAT32
;
685 precision
= PREC_FLOAT64
;
688 precision
= PREC_FLOAT128
;
691 g_assert_not_reached();
695 g_assert_not_reached();
699 static void pr_stats(void)
701 printf("%.2f MFlops\n", (double)n_completed_ops
/ ns_elapsed
* 1e3
);
704 int main(int argc
, char *argv
[])
706 parse_args(argc
, argv
);