xref: /openbmc/qemu/tests/fp/fp-bench.c (revision 7ea7b9ad)
1 /*
2  * fp-bench.c - A collection of simple floating point microbenchmarks.
3  *
4  * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  */
9 #ifndef HW_POISON_H
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
11 #endif
12 
13 #include "qemu/osdep.h"
14 #include <math.h>
15 #include <fenv.h>
16 #include "qemu/timer.h"
17 #include "fpu/softfloat.h"
18 
19 /* amortize the computation of random inputs */
20 #define OPS_PER_ITER     50000
21 
22 #define MAX_OPERANDS 3
23 
24 #define SEED_A 0xdeadfacedeadface
25 #define SEED_B 0xbadc0feebadc0fee
26 #define SEED_C 0xbeefdeadbeefdead
27 
28 enum op {
29     OP_ADD,
30     OP_SUB,
31     OP_MUL,
32     OP_DIV,
33     OP_FMA,
34     OP_SQRT,
35     OP_CMP,
36     OP_MAX_NR,
37 };
38 
39 static const char * const op_names[] = {
40     [OP_ADD] = "add",
41     [OP_SUB] = "sub",
42     [OP_MUL] = "mul",
43     [OP_DIV] = "div",
44     [OP_FMA] = "mulAdd",
45     [OP_SQRT] = "sqrt",
46     [OP_CMP] = "cmp",
47     [OP_MAX_NR] = NULL,
48 };
49 
50 enum precision {
51     PREC_SINGLE,
52     PREC_DOUBLE,
53     PREC_FLOAT32,
54     PREC_FLOAT64,
55     PREC_MAX_NR,
56 };
57 
58 enum rounding {
59     ROUND_EVEN,
60     ROUND_ZERO,
61     ROUND_DOWN,
62     ROUND_UP,
63     ROUND_TIEAWAY,
64     N_ROUND_MODES,
65 };
66 
67 static const char * const round_names[] = {
68     [ROUND_EVEN] = "even",
69     [ROUND_ZERO] = "zero",
70     [ROUND_DOWN] = "down",
71     [ROUND_UP] = "up",
72     [ROUND_TIEAWAY] = "tieaway",
73 };
74 
75 enum tester {
76     TESTER_SOFT,
77     TESTER_HOST,
78     TESTER_MAX_NR,
79 };
80 
81 static const char * const tester_names[] = {
82     [TESTER_SOFT] = "soft",
83     [TESTER_HOST] = "host",
84     [TESTER_MAX_NR] = NULL,
85 };
86 
87 union fp {
88     float f;
89     double d;
90     float32 f32;
91     float64 f64;
92     uint64_t u64;
93 };
94 
95 struct op_state;
96 
97 typedef float (*float_func_t)(const struct op_state *s);
98 typedef double (*double_func_t)(const struct op_state *s);
99 
100 union fp_func {
101     float_func_t float_func;
102     double_func_t double_func;
103 };
104 
105 typedef void (*bench_func_t)(void);
106 
107 struct op_desc {
108     const char * const name;
109 };
110 
111 #define DEFAULT_DURATION_SECS 1
112 
113 static uint64_t random_ops[MAX_OPERANDS] = {
114     SEED_A, SEED_B, SEED_C,
115 };
116 static float_status soft_status;
117 static enum precision precision;
118 static enum op operation;
119 static enum tester tester;
120 static uint64_t n_completed_ops;
121 static unsigned int duration = DEFAULT_DURATION_SECS;
122 static int64_t ns_elapsed;
123 /* disable optimizations with volatile */
124 static volatile union fp res;
125 
126 /*
127  * From: https://en.wikipedia.org/wiki/Xorshift
128  * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
129  * guaranteed to be >= INT_MAX).
130  */
131 static uint64_t xorshift64star(uint64_t x)
132 {
133     x ^= x >> 12; /* a */
134     x ^= x << 25; /* b */
135     x ^= x >> 27; /* c */
136     return x * UINT64_C(2685821657736338717);
137 }
138 
139 static void update_random_ops(int n_ops, enum precision prec)
140 {
141     int i;
142 
143     for (i = 0; i < n_ops; i++) {
144         uint64_t r = random_ops[i];
145 
146         switch (prec) {
147         case PREC_SINGLE:
148         case PREC_FLOAT32:
149             do {
150                 r = xorshift64star(r);
151             } while (!float32_is_normal(r));
152             break;
153         case PREC_DOUBLE:
154         case PREC_FLOAT64:
155             do {
156                 r = xorshift64star(r);
157             } while (!float64_is_normal(r));
158             break;
159         default:
160             g_assert_not_reached();
161         }
162         random_ops[i] = r;
163     }
164 }
165 
166 static void fill_random(union fp *ops, int n_ops, enum precision prec,
167                         bool no_neg)
168 {
169     int i;
170 
171     for (i = 0; i < n_ops; i++) {
172         switch (prec) {
173         case PREC_SINGLE:
174         case PREC_FLOAT32:
175             ops[i].f32 = make_float32(random_ops[i]);
176             if (no_neg && float32_is_neg(ops[i].f32)) {
177                 ops[i].f32 = float32_chs(ops[i].f32);
178             }
179             break;
180         case PREC_DOUBLE:
181         case PREC_FLOAT64:
182             ops[i].f64 = make_float64(random_ops[i]);
183             if (no_neg && float64_is_neg(ops[i].f64)) {
184                 ops[i].f64 = float64_chs(ops[i].f64);
185             }
186             break;
187         default:
188             g_assert_not_reached();
189         }
190     }
191 }
192 
193 /*
194  * The main benchmark function. Instead of (ab)using macros, we rely
195  * on the compiler to unfold this at compile-time.
196  */
197 static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
198 {
199     int64_t tf = get_clock() + duration * 1000000000LL;
200 
201     while (get_clock() < tf) {
202         union fp ops[MAX_OPERANDS];
203         int64_t t0;
204         int i;
205 
206         update_random_ops(n_ops, prec);
207         switch (prec) {
208         case PREC_SINGLE:
209             fill_random(ops, n_ops, prec, no_neg);
210             t0 = get_clock();
211             for (i = 0; i < OPS_PER_ITER; i++) {
212                 float a = ops[0].f;
213                 float b = ops[1].f;
214                 float c = ops[2].f;
215 
216                 switch (op) {
217                 case OP_ADD:
218                     res.f = a + b;
219                     break;
220                 case OP_SUB:
221                     res.f = a - b;
222                     break;
223                 case OP_MUL:
224                     res.f = a * b;
225                     break;
226                 case OP_DIV:
227                     res.f = a / b;
228                     break;
229                 case OP_FMA:
230                     res.f = fmaf(a, b, c);
231                     break;
232                 case OP_SQRT:
233                     res.f = sqrtf(a);
234                     break;
235                 case OP_CMP:
236                     res.u64 = isgreater(a, b);
237                     break;
238                 default:
239                     g_assert_not_reached();
240                 }
241             }
242             break;
243         case PREC_DOUBLE:
244             fill_random(ops, n_ops, prec, no_neg);
245             t0 = get_clock();
246             for (i = 0; i < OPS_PER_ITER; i++) {
247                 double a = ops[0].d;
248                 double b = ops[1].d;
249                 double c = ops[2].d;
250 
251                 switch (op) {
252                 case OP_ADD:
253                     res.d = a + b;
254                     break;
255                 case OP_SUB:
256                     res.d = a - b;
257                     break;
258                 case OP_MUL:
259                     res.d = a * b;
260                     break;
261                 case OP_DIV:
262                     res.d = a / b;
263                     break;
264                 case OP_FMA:
265                     res.d = fma(a, b, c);
266                     break;
267                 case OP_SQRT:
268                     res.d = sqrt(a);
269                     break;
270                 case OP_CMP:
271                     res.u64 = isgreater(a, b);
272                     break;
273                 default:
274                     g_assert_not_reached();
275                 }
276             }
277             break;
278         case PREC_FLOAT32:
279             fill_random(ops, n_ops, prec, no_neg);
280             t0 = get_clock();
281             for (i = 0; i < OPS_PER_ITER; i++) {
282                 float32 a = ops[0].f32;
283                 float32 b = ops[1].f32;
284                 float32 c = ops[2].f32;
285 
286                 switch (op) {
287                 case OP_ADD:
288                     res.f32 = float32_add(a, b, &soft_status);
289                     break;
290                 case OP_SUB:
291                     res.f32 = float32_sub(a, b, &soft_status);
292                     break;
293                 case OP_MUL:
294                     res.f = float32_mul(a, b, &soft_status);
295                     break;
296                 case OP_DIV:
297                     res.f32 = float32_div(a, b, &soft_status);
298                     break;
299                 case OP_FMA:
300                     res.f32 = float32_muladd(a, b, c, 0, &soft_status);
301                     break;
302                 case OP_SQRT:
303                     res.f32 = float32_sqrt(a, &soft_status);
304                     break;
305                 case OP_CMP:
306                     res.u64 = float32_compare_quiet(a, b, &soft_status);
307                     break;
308                 default:
309                     g_assert_not_reached();
310                 }
311             }
312             break;
313         case PREC_FLOAT64:
314             fill_random(ops, n_ops, prec, no_neg);
315             t0 = get_clock();
316             for (i = 0; i < OPS_PER_ITER; i++) {
317                 float64 a = ops[0].f64;
318                 float64 b = ops[1].f64;
319                 float64 c = ops[2].f64;
320 
321                 switch (op) {
322                 case OP_ADD:
323                     res.f64 = float64_add(a, b, &soft_status);
324                     break;
325                 case OP_SUB:
326                     res.f64 = float64_sub(a, b, &soft_status);
327                     break;
328                 case OP_MUL:
329                     res.f = float64_mul(a, b, &soft_status);
330                     break;
331                 case OP_DIV:
332                     res.f64 = float64_div(a, b, &soft_status);
333                     break;
334                 case OP_FMA:
335                     res.f64 = float64_muladd(a, b, c, 0, &soft_status);
336                     break;
337                 case OP_SQRT:
338                     res.f64 = float64_sqrt(a, &soft_status);
339                     break;
340                 case OP_CMP:
341                     res.u64 = float64_compare_quiet(a, b, &soft_status);
342                     break;
343                 default:
344                     g_assert_not_reached();
345                 }
346             }
347             break;
348         default:
349             g_assert_not_reached();
350         }
351         ns_elapsed += get_clock() - t0;
352         n_completed_ops += OPS_PER_ITER;
353     }
354 }
355 
356 #define GEN_BENCH(name, type, prec, op, n_ops)          \
357     static void __attribute__((flatten)) name(void)     \
358     {                                                   \
359         bench(prec, op, n_ops, false);                  \
360     }
361 
362 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
363     static void __attribute__((flatten)) name(void)     \
364     {                                                   \
365         bench(prec, op, n_ops, true);                   \
366     }
367 
368 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
369     GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
370     GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
371     GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
372     GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
373 
374 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
375 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
376 GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
377 GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
378 GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
379 GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
380 #undef GEN_BENCH_ALL_TYPES
381 
382 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
383     GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
384     GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
385     GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
386     GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
387 
388 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
389 #undef GEN_BENCH_ALL_TYPES_NO_NEG
390 
391 #undef GEN_BENCH_NO_NEG
392 #undef GEN_BENCH
393 
394 #define GEN_BENCH_FUNCS(opname, op)                             \
395     [op] = {                                                    \
396         [PREC_SINGLE]    = bench_ ## opname ## _float,          \
397         [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
398         [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
399         [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
400     }
401 
402 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
403     GEN_BENCH_FUNCS(add, OP_ADD),
404     GEN_BENCH_FUNCS(sub, OP_SUB),
405     GEN_BENCH_FUNCS(mul, OP_MUL),
406     GEN_BENCH_FUNCS(div, OP_DIV),
407     GEN_BENCH_FUNCS(fma, OP_FMA),
408     GEN_BENCH_FUNCS(sqrt, OP_SQRT),
409     GEN_BENCH_FUNCS(cmp, OP_CMP),
410 };
411 
412 #undef GEN_BENCH_FUNCS
413 
414 static void run_bench(void)
415 {
416     bench_func_t f;
417 
418     f = bench_funcs[operation][precision];
419     g_assert(f);
420     f();
421 }
422 
423 /* @arr must be NULL-terminated */
424 static int find_name(const char * const *arr, const char *name)
425 {
426     int i;
427 
428     for (i = 0; arr[i] != NULL; i++) {
429         if (strcmp(name, arr[i]) == 0) {
430             return i;
431         }
432     }
433     return -1;
434 }
435 
436 static void usage_complete(int argc, char *argv[])
437 {
438     gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
439     gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
440 
441     fprintf(stderr, "Usage: %s [options]\n", argv[0]);
442     fprintf(stderr, "options:\n");
443     fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
444             DEFAULT_DURATION_SECS);
445     fprintf(stderr, " -h = show this help message.\n");
446     fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
447             op_list, op_names[0]);
448     fprintf(stderr, " -p = floating point precision (single, double). "
449             "Default: single\n");
450     fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
451             "Default: even\n");
452     fprintf(stderr, " -t = tester (%s). Default: %s\n",
453             tester_list, tester_names[0]);
454     fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
455             "Default: disabled\n");
456     fprintf(stderr, " -Z = flush output to zero (soft tester only). "
457             "Default: disabled\n");
458 
459     g_free(tester_list);
460     g_free(op_list);
461 }
462 
463 static int round_name_to_mode(const char *name)
464 {
465     int i;
466 
467     for (i = 0; i < N_ROUND_MODES; i++) {
468         if (!strcmp(round_names[i], name)) {
469             return i;
470         }
471     }
472     return -1;
473 }
474 
475 static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
476 {
477     fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
478             round_names[rounding]);
479     exit(EXIT_FAILURE);
480 }
481 
482 static void set_host_precision(enum rounding rounding)
483 {
484     int rhost;
485 
486     switch (rounding) {
487     case ROUND_EVEN:
488         rhost = FE_TONEAREST;
489         break;
490     case ROUND_ZERO:
491         rhost = FE_TOWARDZERO;
492         break;
493     case ROUND_DOWN:
494         rhost = FE_DOWNWARD;
495         break;
496     case ROUND_UP:
497         rhost = FE_UPWARD;
498         break;
499     case ROUND_TIEAWAY:
500         die_host_rounding(rounding);
501         return;
502     default:
503         g_assert_not_reached();
504     }
505 
506     if (fesetround(rhost)) {
507         die_host_rounding(rounding);
508     }
509 }
510 
511 static void set_soft_precision(enum rounding rounding)
512 {
513     signed char mode;
514 
515     switch (rounding) {
516     case ROUND_EVEN:
517         mode = float_round_nearest_even;
518         break;
519     case ROUND_ZERO:
520         mode = float_round_to_zero;
521         break;
522     case ROUND_DOWN:
523         mode = float_round_down;
524         break;
525     case ROUND_UP:
526         mode = float_round_up;
527         break;
528     case ROUND_TIEAWAY:
529         mode = float_round_ties_away;
530         break;
531     default:
532         g_assert_not_reached();
533     }
534     soft_status.float_rounding_mode = mode;
535 }
536 
537 static void parse_args(int argc, char *argv[])
538 {
539     int c;
540     int val;
541     int rounding = ROUND_EVEN;
542 
543     for (;;) {
544         c = getopt(argc, argv, "d:ho:p:r:t:zZ");
545         if (c < 0) {
546             break;
547         }
548         switch (c) {
549         case 'd':
550             duration = atoi(optarg);
551             break;
552         case 'h':
553             usage_complete(argc, argv);
554             exit(EXIT_SUCCESS);
555         case 'o':
556             val = find_name(op_names, optarg);
557             if (val < 0) {
558                 fprintf(stderr, "Unsupported op '%s'\n", optarg);
559                 exit(EXIT_FAILURE);
560             }
561             operation = val;
562             break;
563         case 'p':
564             if (!strcmp(optarg, "single")) {
565                 precision = PREC_SINGLE;
566             } else if (!strcmp(optarg, "double")) {
567                 precision = PREC_DOUBLE;
568             } else {
569                 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
570                 exit(EXIT_FAILURE);
571             }
572             break;
573         case 'r':
574             rounding = round_name_to_mode(optarg);
575             if (rounding < 0) {
576                 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
577                 exit(EXIT_FAILURE);
578             }
579             break;
580         case 't':
581             val = find_name(tester_names, optarg);
582             if (val < 0) {
583                 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
584                 exit(EXIT_FAILURE);
585             }
586             tester = val;
587             break;
588         case 'z':
589             soft_status.flush_inputs_to_zero = 1;
590             break;
591         case 'Z':
592             soft_status.flush_to_zero = 1;
593             break;
594         }
595     }
596 
597     /* set precision and rounding mode based on the tester */
598     switch (tester) {
599     case TESTER_HOST:
600         set_host_precision(rounding);
601         break;
602     case TESTER_SOFT:
603         set_soft_precision(rounding);
604         switch (precision) {
605         case PREC_SINGLE:
606             precision = PREC_FLOAT32;
607             break;
608         case PREC_DOUBLE:
609             precision = PREC_FLOAT64;
610             break;
611         default:
612             g_assert_not_reached();
613         }
614         break;
615     default:
616         g_assert_not_reached();
617     }
618 }
619 
620 static void pr_stats(void)
621 {
622     printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
623 }
624 
625 int main(int argc, char *argv[])
626 {
627     parse_args(argc, argv);
628     run_bench();
629     pr_stats();
630     return 0;
631 }
632