2 * fp-bench.c - A collection of simple floating point microbenchmarks.
4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
13 #include "qemu/osdep.h"
16 #include "qemu/timer.h"
17 #include "fpu/softfloat.h"
19 /* amortize the computation of random inputs */
20 #define OPS_PER_ITER 50000
22 #define MAX_OPERANDS 3
24 #define SEED_A 0xdeadfacedeadface
25 #define SEED_B 0xbadc0feebadc0fee
26 #define SEED_C 0xbeefdeadbeefdead
39 static const char * const op_names
[] = {
67 static const char * const round_names
[] = {
68 [ROUND_EVEN
] = "even",
69 [ROUND_ZERO
] = "zero",
70 [ROUND_DOWN
] = "down",
72 [ROUND_TIEAWAY
] = "tieaway",
81 static const char * const tester_names
[] = {
82 [TESTER_SOFT
] = "soft",
83 [TESTER_HOST
] = "host",
84 [TESTER_MAX_NR
] = NULL
,
97 typedef float (*float_func_t
)(const struct op_state
*s
);
98 typedef double (*double_func_t
)(const struct op_state
*s
);
101 float_func_t float_func
;
102 double_func_t double_func
;
105 typedef void (*bench_func_t
)(void);
108 const char * const name
;
111 #define DEFAULT_DURATION_SECS 1
113 static uint64_t random_ops
[MAX_OPERANDS
] = {
114 SEED_A
, SEED_B
, SEED_C
,
116 static float_status soft_status
;
117 static enum precision precision
;
118 static enum op operation
;
119 static enum tester tester
;
120 static uint64_t n_completed_ops
;
121 static unsigned int duration
= DEFAULT_DURATION_SECS
;
122 static int64_t ns_elapsed
;
123 /* disable optimizations with volatile */
124 static volatile union fp res
;
127 * From: https://en.wikipedia.org/wiki/Xorshift
128 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
129 * guaranteed to be >= INT_MAX).
131 static uint64_t xorshift64star(uint64_t x
)
133 x
^= x
>> 12; /* a */
134 x
^= x
<< 25; /* b */
135 x
^= x
>> 27; /* c */
136 return x
* UINT64_C(2685821657736338717);
139 static void update_random_ops(int n_ops
, enum precision prec
)
143 for (i
= 0; i
< n_ops
; i
++) {
144 uint64_t r
= random_ops
[i
];
146 if (prec
== PREC_SINGLE
|| PREC_FLOAT32
) {
148 r
= xorshift64star(r
);
149 } while (!float32_is_normal(r
));
150 } else if (prec
== PREC_DOUBLE
|| PREC_FLOAT64
) {
152 r
= xorshift64star(r
);
153 } while (!float64_is_normal(r
));
155 g_assert_not_reached();
161 static void fill_random(union fp
*ops
, int n_ops
, enum precision prec
,
166 for (i
= 0; i
< n_ops
; i
++) {
170 ops
[i
].f32
= make_float32(random_ops
[i
]);
171 if (no_neg
&& float32_is_neg(ops
[i
].f32
)) {
172 ops
[i
].f32
= float32_chs(ops
[i
].f32
);
174 /* raise the exponent to limit the frequency of denormal results */
175 ops
[i
].f32
|= 0x40000000;
179 ops
[i
].f64
= make_float64(random_ops
[i
]);
180 if (no_neg
&& float64_is_neg(ops
[i
].f64
)) {
181 ops
[i
].f64
= float64_chs(ops
[i
].f64
);
183 /* raise the exponent to limit the frequency of denormal results */
184 ops
[i
].f64
|= LIT64(0x4000000000000000);
187 g_assert_not_reached();
193 * The main benchmark function. Instead of (ab)using macros, we rely
194 * on the compiler to unfold this at compile-time.
196 static void bench(enum precision prec
, enum op op
, int n_ops
, bool no_neg
)
198 int64_t tf
= get_clock() + duration
* 1000000000LL;
200 while (get_clock() < tf
) {
201 union fp ops
[MAX_OPERANDS
];
205 update_random_ops(n_ops
, prec
);
208 fill_random(ops
, n_ops
, prec
, no_neg
);
210 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
229 res
.f
= fmaf(a
, b
, c
);
235 res
.u64
= isgreater(a
, b
);
238 g_assert_not_reached();
243 fill_random(ops
, n_ops
, prec
, no_neg
);
245 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
264 res
.d
= fma(a
, b
, c
);
270 res
.u64
= isgreater(a
, b
);
273 g_assert_not_reached();
278 fill_random(ops
, n_ops
, prec
, no_neg
);
280 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
281 float32 a
= ops
[0].f32
;
282 float32 b
= ops
[1].f32
;
283 float32 c
= ops
[2].f32
;
287 res
.f32
= float32_add(a
, b
, &soft_status
);
290 res
.f32
= float32_sub(a
, b
, &soft_status
);
293 res
.f
= float32_mul(a
, b
, &soft_status
);
296 res
.f32
= float32_div(a
, b
, &soft_status
);
299 res
.f32
= float32_muladd(a
, b
, c
, 0, &soft_status
);
302 res
.f32
= float32_sqrt(a
, &soft_status
);
305 res
.u64
= float32_compare_quiet(a
, b
, &soft_status
);
308 g_assert_not_reached();
313 fill_random(ops
, n_ops
, prec
, no_neg
);
315 for (i
= 0; i
< OPS_PER_ITER
; i
++) {
316 float64 a
= ops
[0].f64
;
317 float64 b
= ops
[1].f64
;
318 float64 c
= ops
[2].f64
;
322 res
.f64
= float64_add(a
, b
, &soft_status
);
325 res
.f64
= float64_sub(a
, b
, &soft_status
);
328 res
.f
= float64_mul(a
, b
, &soft_status
);
331 res
.f64
= float64_div(a
, b
, &soft_status
);
334 res
.f64
= float64_muladd(a
, b
, c
, 0, &soft_status
);
337 res
.f64
= float64_sqrt(a
, &soft_status
);
340 res
.u64
= float64_compare_quiet(a
, b
, &soft_status
);
343 g_assert_not_reached();
348 g_assert_not_reached();
350 ns_elapsed
+= get_clock() - t0
;
351 n_completed_ops
+= OPS_PER_ITER
;
355 #define GEN_BENCH(name, type, prec, op, n_ops) \
356 static void __attribute__((flatten)) name(void) \
358 bench(prec, op, n_ops, false); \
361 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
362 static void __attribute__((flatten)) name(void) \
364 bench(prec, op, n_ops, true); \
367 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
368 GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
369 GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
370 GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
371 GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
373 GEN_BENCH_ALL_TYPES(add
, OP_ADD
, 2)
374 GEN_BENCH_ALL_TYPES(sub
, OP_SUB
, 2)
375 GEN_BENCH_ALL_TYPES(mul
, OP_MUL
, 2)
376 GEN_BENCH_ALL_TYPES(div
, OP_DIV
, 2)
377 GEN_BENCH_ALL_TYPES(fma
, OP_FMA
, 3)
378 GEN_BENCH_ALL_TYPES(cmp
, OP_CMP
, 2)
379 #undef GEN_BENCH_ALL_TYPES
381 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
382 GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
383 GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
384 GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
385 GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
387 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt
, OP_SQRT
, 1)
388 #undef GEN_BENCH_ALL_TYPES_NO_NEG
390 #undef GEN_BENCH_NO_NEG
393 #define GEN_BENCH_FUNCS(opname, op) \
395 [PREC_SINGLE] = bench_ ## opname ## _float, \
396 [PREC_DOUBLE] = bench_ ## opname ## _double, \
397 [PREC_FLOAT32] = bench_ ## opname ## _float32, \
398 [PREC_FLOAT64] = bench_ ## opname ## _float64, \
401 static const bench_func_t bench_funcs
[OP_MAX_NR
][PREC_MAX_NR
] = {
402 GEN_BENCH_FUNCS(add
, OP_ADD
),
403 GEN_BENCH_FUNCS(sub
, OP_SUB
),
404 GEN_BENCH_FUNCS(mul
, OP_MUL
),
405 GEN_BENCH_FUNCS(div
, OP_DIV
),
406 GEN_BENCH_FUNCS(fma
, OP_FMA
),
407 GEN_BENCH_FUNCS(sqrt
, OP_SQRT
),
408 GEN_BENCH_FUNCS(cmp
, OP_CMP
),
411 #undef GEN_BENCH_FUNCS
413 static void run_bench(void)
417 f
= bench_funcs
[operation
][precision
];
422 /* @arr must be NULL-terminated */
423 static int find_name(const char * const *arr
, const char *name
)
427 for (i
= 0; arr
[i
] != NULL
; i
++) {
428 if (strcmp(name
, arr
[i
]) == 0) {
435 static void usage_complete(int argc
, char *argv
[])
437 gchar
*op_list
= g_strjoinv(", ", (gchar
**)op_names
);
438 gchar
*tester_list
= g_strjoinv(", ", (gchar
**)tester_names
);
440 fprintf(stderr
, "Usage: %s [options]\n", argv
[0]);
441 fprintf(stderr
, "options:\n");
442 fprintf(stderr
, " -d = duration, in seconds. Default: %d\n",
443 DEFAULT_DURATION_SECS
);
444 fprintf(stderr
, " -h = show this help message.\n");
445 fprintf(stderr
, " -o = floating point operation (%s). Default: %s\n",
446 op_list
, op_names
[0]);
447 fprintf(stderr
, " -p = floating point precision (single, double). "
448 "Default: single\n");
449 fprintf(stderr
, " -r = rounding mode (even, zero, down, up, tieaway). "
451 fprintf(stderr
, " -t = tester (%s). Default: %s\n",
452 tester_list
, tester_names
[0]);
453 fprintf(stderr
, " -z = flush inputs to zero (soft tester only). "
454 "Default: disabled\n");
455 fprintf(stderr
, " -Z = flush output to zero (soft tester only). "
456 "Default: disabled\n");
462 static int round_name_to_mode(const char *name
)
466 for (i
= 0; i
< N_ROUND_MODES
; i
++) {
467 if (!strcmp(round_names
[i
], name
)) {
474 static void QEMU_NORETURN
die_host_rounding(enum rounding rounding
)
476 fprintf(stderr
, "fatal: '%s' rounding not supported on this host\n",
477 round_names
[rounding
]);
481 static void set_host_precision(enum rounding rounding
)
487 rhost
= FE_TONEAREST
;
490 rhost
= FE_TOWARDZERO
;
499 die_host_rounding(rounding
);
502 g_assert_not_reached();
505 if (fesetround(rhost
)) {
506 die_host_rounding(rounding
);
510 static void set_soft_precision(enum rounding rounding
)
516 mode
= float_round_nearest_even
;
519 mode
= float_round_to_zero
;
522 mode
= float_round_down
;
525 mode
= float_round_up
;
528 mode
= float_round_ties_away
;
531 g_assert_not_reached();
533 soft_status
.float_rounding_mode
= mode
;
536 static void parse_args(int argc
, char *argv
[])
540 int rounding
= ROUND_EVEN
;
543 c
= getopt(argc
, argv
, "d:ho:p:r:t:zZ");
549 duration
= atoi(optarg
);
552 usage_complete(argc
, argv
);
555 val
= find_name(op_names
, optarg
);
557 fprintf(stderr
, "Unsupported op '%s'\n", optarg
);
563 if (!strcmp(optarg
, "single")) {
564 precision
= PREC_SINGLE
;
565 } else if (!strcmp(optarg
, "double")) {
566 precision
= PREC_DOUBLE
;
568 fprintf(stderr
, "Unsupported precision '%s'\n", optarg
);
573 rounding
= round_name_to_mode(optarg
);
575 fprintf(stderr
, "fatal: invalid rounding mode '%s'\n", optarg
);
580 val
= find_name(tester_names
, optarg
);
582 fprintf(stderr
, "Unsupported tester '%s'\n", optarg
);
588 soft_status
.flush_inputs_to_zero
= 1;
591 soft_status
.flush_to_zero
= 1;
596 /* set precision and rounding mode based on the tester */
599 set_host_precision(rounding
);
602 set_soft_precision(rounding
);
605 precision
= PREC_FLOAT32
;
608 precision
= PREC_FLOAT64
;
611 g_assert_not_reached();
615 g_assert_not_reached();
619 static void pr_stats(void)
621 printf("%.2f MFlops\n", (double)n_completed_ops
/ ns_elapsed
* 1e3
);
624 int main(int argc
, char *argv
[])
626 parse_args(argc
, argv
);