xref: /openbmc/qemu/tests/fp/fp-bench.c (revision 31cf4b97)
1 /*
2  * fp-bench.c - A collection of simple floating point microbenchmarks.
3  *
4  * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  */
9 #ifndef HW_POISON_H
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
11 #endif
12 
13 #include "qemu/osdep.h"
14 #include <math.h>
15 #include <fenv.h>
16 #include "qemu/timer.h"
17 #include "fpu/softfloat.h"
18 
19 /* amortize the computation of random inputs */
20 #define OPS_PER_ITER     50000
21 
22 #define MAX_OPERANDS 3
23 
24 #define SEED_A 0xdeadfacedeadface
25 #define SEED_B 0xbadc0feebadc0fee
26 #define SEED_C 0xbeefdeadbeefdead
27 
28 enum op {
29     OP_ADD,
30     OP_SUB,
31     OP_MUL,
32     OP_DIV,
33     OP_FMA,
34     OP_SQRT,
35     OP_CMP,
36     OP_MAX_NR,
37 };
38 
39 static const char * const op_names[] = {
40     [OP_ADD] = "add",
41     [OP_SUB] = "sub",
42     [OP_MUL] = "mul",
43     [OP_DIV] = "div",
44     [OP_FMA] = "mulAdd",
45     [OP_SQRT] = "sqrt",
46     [OP_CMP] = "cmp",
47     [OP_MAX_NR] = NULL,
48 };
49 
50 enum precision {
51     PREC_SINGLE,
52     PREC_DOUBLE,
53     PREC_FLOAT32,
54     PREC_FLOAT64,
55     PREC_MAX_NR,
56 };
57 
58 enum rounding {
59     ROUND_EVEN,
60     ROUND_ZERO,
61     ROUND_DOWN,
62     ROUND_UP,
63     ROUND_TIEAWAY,
64     N_ROUND_MODES,
65 };
66 
67 static const char * const round_names[] = {
68     [ROUND_EVEN] = "even",
69     [ROUND_ZERO] = "zero",
70     [ROUND_DOWN] = "down",
71     [ROUND_UP] = "up",
72     [ROUND_TIEAWAY] = "tieaway",
73 };
74 
75 enum tester {
76     TESTER_SOFT,
77     TESTER_HOST,
78     TESTER_MAX_NR,
79 };
80 
81 static const char * const tester_names[] = {
82     [TESTER_SOFT] = "soft",
83     [TESTER_HOST] = "host",
84     [TESTER_MAX_NR] = NULL,
85 };
86 
87 union fp {
88     float f;
89     double d;
90     float32 f32;
91     float64 f64;
92     uint64_t u64;
93 };
94 
95 struct op_state;
96 
97 typedef float (*float_func_t)(const struct op_state *s);
98 typedef double (*double_func_t)(const struct op_state *s);
99 
100 union fp_func {
101     float_func_t float_func;
102     double_func_t double_func;
103 };
104 
105 typedef void (*bench_func_t)(void);
106 
107 struct op_desc {
108     const char * const name;
109 };
110 
111 #define DEFAULT_DURATION_SECS 1
112 
113 static uint64_t random_ops[MAX_OPERANDS] = {
114     SEED_A, SEED_B, SEED_C,
115 };
116 static float_status soft_status;
117 static enum precision precision;
118 static enum op operation;
119 static enum tester tester;
120 static uint64_t n_completed_ops;
121 static unsigned int duration = DEFAULT_DURATION_SECS;
122 static int64_t ns_elapsed;
123 /* disable optimizations with volatile */
124 static volatile union fp res;
125 
126 /*
127  * From: https://en.wikipedia.org/wiki/Xorshift
128  * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
129  * guaranteed to be >= INT_MAX).
130  */
131 static uint64_t xorshift64star(uint64_t x)
132 {
133     x ^= x >> 12; /* a */
134     x ^= x << 25; /* b */
135     x ^= x >> 27; /* c */
136     return x * UINT64_C(2685821657736338717);
137 }
138 
139 static void update_random_ops(int n_ops, enum precision prec)
140 {
141     int i;
142 
143     for (i = 0; i < n_ops; i++) {
144         uint64_t r = random_ops[i];
145 
146         if (prec == PREC_SINGLE || PREC_FLOAT32) {
147             do {
148                 r = xorshift64star(r);
149             } while (!float32_is_normal(r));
150         } else if (prec == PREC_DOUBLE || PREC_FLOAT64) {
151             do {
152                 r = xorshift64star(r);
153             } while (!float64_is_normal(r));
154         } else {
155             g_assert_not_reached();
156         }
157         random_ops[i] = r;
158     }
159 }
160 
161 static void fill_random(union fp *ops, int n_ops, enum precision prec,
162                         bool no_neg)
163 {
164     int i;
165 
166     for (i = 0; i < n_ops; i++) {
167         switch (prec) {
168         case PREC_SINGLE:
169         case PREC_FLOAT32:
170             ops[i].f32 = make_float32(random_ops[i]);
171             if (no_neg && float32_is_neg(ops[i].f32)) {
172                 ops[i].f32 = float32_chs(ops[i].f32);
173             }
174             /* raise the exponent to limit the frequency of denormal results */
175             ops[i].f32 |= 0x40000000;
176             break;
177         case PREC_DOUBLE:
178         case PREC_FLOAT64:
179             ops[i].f64 = make_float64(random_ops[i]);
180             if (no_neg && float64_is_neg(ops[i].f64)) {
181                 ops[i].f64 = float64_chs(ops[i].f64);
182             }
183             /* raise the exponent to limit the frequency of denormal results */
184             ops[i].f64 |= LIT64(0x4000000000000000);
185             break;
186         default:
187             g_assert_not_reached();
188         }
189     }
190 }
191 
192 /*
193  * The main benchmark function. Instead of (ab)using macros, we rely
194  * on the compiler to unfold this at compile-time.
195  */
196 static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
197 {
198     int64_t tf = get_clock() + duration * 1000000000LL;
199 
200     while (get_clock() < tf) {
201         union fp ops[MAX_OPERANDS];
202         int64_t t0;
203         int i;
204 
205         update_random_ops(n_ops, prec);
206         switch (prec) {
207         case PREC_SINGLE:
208             fill_random(ops, n_ops, prec, no_neg);
209             t0 = get_clock();
210             for (i = 0; i < OPS_PER_ITER; i++) {
211                 float a = ops[0].f;
212                 float b = ops[1].f;
213                 float c = ops[2].f;
214 
215                 switch (op) {
216                 case OP_ADD:
217                     res.f = a + b;
218                     break;
219                 case OP_SUB:
220                     res.f = a - b;
221                     break;
222                 case OP_MUL:
223                     res.f = a * b;
224                     break;
225                 case OP_DIV:
226                     res.f = a / b;
227                     break;
228                 case OP_FMA:
229                     res.f = fmaf(a, b, c);
230                     break;
231                 case OP_SQRT:
232                     res.f = sqrtf(a);
233                     break;
234                 case OP_CMP:
235                     res.u64 = isgreater(a, b);
236                     break;
237                 default:
238                     g_assert_not_reached();
239                 }
240             }
241             break;
242         case PREC_DOUBLE:
243             fill_random(ops, n_ops, prec, no_neg);
244             t0 = get_clock();
245             for (i = 0; i < OPS_PER_ITER; i++) {
246                 double a = ops[0].d;
247                 double b = ops[1].d;
248                 double c = ops[2].d;
249 
250                 switch (op) {
251                 case OP_ADD:
252                     res.d = a + b;
253                     break;
254                 case OP_SUB:
255                     res.d = a - b;
256                     break;
257                 case OP_MUL:
258                     res.d = a * b;
259                     break;
260                 case OP_DIV:
261                     res.d = a / b;
262                     break;
263                 case OP_FMA:
264                     res.d = fma(a, b, c);
265                     break;
266                 case OP_SQRT:
267                     res.d = sqrt(a);
268                     break;
269                 case OP_CMP:
270                     res.u64 = isgreater(a, b);
271                     break;
272                 default:
273                     g_assert_not_reached();
274                 }
275             }
276             break;
277         case PREC_FLOAT32:
278             fill_random(ops, n_ops, prec, no_neg);
279             t0 = get_clock();
280             for (i = 0; i < OPS_PER_ITER; i++) {
281                 float32 a = ops[0].f32;
282                 float32 b = ops[1].f32;
283                 float32 c = ops[2].f32;
284 
285                 switch (op) {
286                 case OP_ADD:
287                     res.f32 = float32_add(a, b, &soft_status);
288                     break;
289                 case OP_SUB:
290                     res.f32 = float32_sub(a, b, &soft_status);
291                     break;
292                 case OP_MUL:
293                     res.f = float32_mul(a, b, &soft_status);
294                     break;
295                 case OP_DIV:
296                     res.f32 = float32_div(a, b, &soft_status);
297                     break;
298                 case OP_FMA:
299                     res.f32 = float32_muladd(a, b, c, 0, &soft_status);
300                     break;
301                 case OP_SQRT:
302                     res.f32 = float32_sqrt(a, &soft_status);
303                     break;
304                 case OP_CMP:
305                     res.u64 = float32_compare_quiet(a, b, &soft_status);
306                     break;
307                 default:
308                     g_assert_not_reached();
309                 }
310             }
311             break;
312         case PREC_FLOAT64:
313             fill_random(ops, n_ops, prec, no_neg);
314             t0 = get_clock();
315             for (i = 0; i < OPS_PER_ITER; i++) {
316                 float64 a = ops[0].f64;
317                 float64 b = ops[1].f64;
318                 float64 c = ops[2].f64;
319 
320                 switch (op) {
321                 case OP_ADD:
322                     res.f64 = float64_add(a, b, &soft_status);
323                     break;
324                 case OP_SUB:
325                     res.f64 = float64_sub(a, b, &soft_status);
326                     break;
327                 case OP_MUL:
328                     res.f = float64_mul(a, b, &soft_status);
329                     break;
330                 case OP_DIV:
331                     res.f64 = float64_div(a, b, &soft_status);
332                     break;
333                 case OP_FMA:
334                     res.f64 = float64_muladd(a, b, c, 0, &soft_status);
335                     break;
336                 case OP_SQRT:
337                     res.f64 = float64_sqrt(a, &soft_status);
338                     break;
339                 case OP_CMP:
340                     res.u64 = float64_compare_quiet(a, b, &soft_status);
341                     break;
342                 default:
343                     g_assert_not_reached();
344                 }
345             }
346             break;
347         default:
348             g_assert_not_reached();
349         }
350         ns_elapsed += get_clock() - t0;
351         n_completed_ops += OPS_PER_ITER;
352     }
353 }
354 
355 #define GEN_BENCH(name, type, prec, op, n_ops)          \
356     static void __attribute__((flatten)) name(void)     \
357     {                                                   \
358         bench(prec, op, n_ops, false);                  \
359     }
360 
361 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
362     static void __attribute__((flatten)) name(void)     \
363     {                                                   \
364         bench(prec, op, n_ops, true);                   \
365     }
366 
367 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
368     GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
369     GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
370     GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
371     GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
372 
373 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
374 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
375 GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
376 GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
377 GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
378 GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
379 #undef GEN_BENCH_ALL_TYPES
380 
381 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
382     GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
383     GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
384     GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
385     GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
386 
387 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
388 #undef GEN_BENCH_ALL_TYPES_NO_NEG
389 
390 #undef GEN_BENCH_NO_NEG
391 #undef GEN_BENCH
392 
393 #define GEN_BENCH_FUNCS(opname, op)                             \
394     [op] = {                                                    \
395         [PREC_SINGLE]    = bench_ ## opname ## _float,          \
396         [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
397         [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
398         [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
399     }
400 
401 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
402     GEN_BENCH_FUNCS(add, OP_ADD),
403     GEN_BENCH_FUNCS(sub, OP_SUB),
404     GEN_BENCH_FUNCS(mul, OP_MUL),
405     GEN_BENCH_FUNCS(div, OP_DIV),
406     GEN_BENCH_FUNCS(fma, OP_FMA),
407     GEN_BENCH_FUNCS(sqrt, OP_SQRT),
408     GEN_BENCH_FUNCS(cmp, OP_CMP),
409 };
410 
411 #undef GEN_BENCH_FUNCS
412 
413 static void run_bench(void)
414 {
415     bench_func_t f;
416 
417     f = bench_funcs[operation][precision];
418     g_assert(f);
419     f();
420 }
421 
422 /* @arr must be NULL-terminated */
423 static int find_name(const char * const *arr, const char *name)
424 {
425     int i;
426 
427     for (i = 0; arr[i] != NULL; i++) {
428         if (strcmp(name, arr[i]) == 0) {
429             return i;
430         }
431     }
432     return -1;
433 }
434 
435 static void usage_complete(int argc, char *argv[])
436 {
437     gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
438     gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
439 
440     fprintf(stderr, "Usage: %s [options]\n", argv[0]);
441     fprintf(stderr, "options:\n");
442     fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
443             DEFAULT_DURATION_SECS);
444     fprintf(stderr, " -h = show this help message.\n");
445     fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
446             op_list, op_names[0]);
447     fprintf(stderr, " -p = floating point precision (single, double). "
448             "Default: single\n");
449     fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
450             "Default: even\n");
451     fprintf(stderr, " -t = tester (%s). Default: %s\n",
452             tester_list, tester_names[0]);
453     fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
454             "Default: disabled\n");
455     fprintf(stderr, " -Z = flush output to zero (soft tester only). "
456             "Default: disabled\n");
457 
458     g_free(tester_list);
459     g_free(op_list);
460 }
461 
462 static int round_name_to_mode(const char *name)
463 {
464     int i;
465 
466     for (i = 0; i < N_ROUND_MODES; i++) {
467         if (!strcmp(round_names[i], name)) {
468             return i;
469         }
470     }
471     return -1;
472 }
473 
474 static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
475 {
476     fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
477             round_names[rounding]);
478     exit(EXIT_FAILURE);
479 }
480 
481 static void set_host_precision(enum rounding rounding)
482 {
483     int rhost;
484 
485     switch (rounding) {
486     case ROUND_EVEN:
487         rhost = FE_TONEAREST;
488         break;
489     case ROUND_ZERO:
490         rhost = FE_TOWARDZERO;
491         break;
492     case ROUND_DOWN:
493         rhost = FE_DOWNWARD;
494         break;
495     case ROUND_UP:
496         rhost = FE_UPWARD;
497         break;
498     case ROUND_TIEAWAY:
499         die_host_rounding(rounding);
500         return;
501     default:
502         g_assert_not_reached();
503     }
504 
505     if (fesetround(rhost)) {
506         die_host_rounding(rounding);
507     }
508 }
509 
510 static void set_soft_precision(enum rounding rounding)
511 {
512     signed char mode;
513 
514     switch (rounding) {
515     case ROUND_EVEN:
516         mode = float_round_nearest_even;
517         break;
518     case ROUND_ZERO:
519         mode = float_round_to_zero;
520         break;
521     case ROUND_DOWN:
522         mode = float_round_down;
523         break;
524     case ROUND_UP:
525         mode = float_round_up;
526         break;
527     case ROUND_TIEAWAY:
528         mode = float_round_ties_away;
529         break;
530     default:
531         g_assert_not_reached();
532     }
533     soft_status.float_rounding_mode = mode;
534 }
535 
536 static void parse_args(int argc, char *argv[])
537 {
538     int c;
539     int val;
540     int rounding = ROUND_EVEN;
541 
542     for (;;) {
543         c = getopt(argc, argv, "d:ho:p:r:t:zZ");
544         if (c < 0) {
545             break;
546         }
547         switch (c) {
548         case 'd':
549             duration = atoi(optarg);
550             break;
551         case 'h':
552             usage_complete(argc, argv);
553             exit(EXIT_SUCCESS);
554         case 'o':
555             val = find_name(op_names, optarg);
556             if (val < 0) {
557                 fprintf(stderr, "Unsupported op '%s'\n", optarg);
558                 exit(EXIT_FAILURE);
559             }
560             operation = val;
561             break;
562         case 'p':
563             if (!strcmp(optarg, "single")) {
564                 precision = PREC_SINGLE;
565             } else if (!strcmp(optarg, "double")) {
566                 precision = PREC_DOUBLE;
567             } else {
568                 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
569                 exit(EXIT_FAILURE);
570             }
571             break;
572         case 'r':
573             rounding = round_name_to_mode(optarg);
574             if (rounding < 0) {
575                 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
576                 exit(EXIT_FAILURE);
577             }
578             break;
579         case 't':
580             val = find_name(tester_names, optarg);
581             if (val < 0) {
582                 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
583                 exit(EXIT_FAILURE);
584             }
585             tester = val;
586             break;
587         case 'z':
588             soft_status.flush_inputs_to_zero = 1;
589             break;
590         case 'Z':
591             soft_status.flush_to_zero = 1;
592             break;
593         }
594     }
595 
596     /* set precision and rounding mode based on the tester */
597     switch (tester) {
598     case TESTER_HOST:
599         set_host_precision(rounding);
600         break;
601     case TESTER_SOFT:
602         set_soft_precision(rounding);
603         switch (precision) {
604         case PREC_SINGLE:
605             precision = PREC_FLOAT32;
606             break;
607         case PREC_DOUBLE:
608             precision = PREC_FLOAT64;
609             break;
610         default:
611             g_assert_not_reached();
612         }
613         break;
614     default:
615         g_assert_not_reached();
616     }
617 }
618 
619 static void pr_stats(void)
620 {
621     printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
622 }
623 
624 int main(int argc, char *argv[])
625 {
626     parse_args(argc, argv);
627     run_bench();
628     pr_stats();
629     return 0;
630 }
631