1*6dc29354SIlya Leoshkevich /*
2*6dc29354SIlya Leoshkevich * Test floating-point multiply-and-add instructions.
3*6dc29354SIlya Leoshkevich *
4*6dc29354SIlya Leoshkevich * SPDX-License-Identifier: GPL-2.0-or-later
5*6dc29354SIlya Leoshkevich */
6*6dc29354SIlya Leoshkevich #include <fenv.h>
7*6dc29354SIlya Leoshkevich #include <stdbool.h>
8*6dc29354SIlya Leoshkevich #include <stdio.h>
9*6dc29354SIlya Leoshkevich #include <stdlib.h>
10*6dc29354SIlya Leoshkevich #include <string.h>
11*6dc29354SIlya Leoshkevich #include "float.h"
12*6dc29354SIlya Leoshkevich
13*6dc29354SIlya Leoshkevich union val {
14*6dc29354SIlya Leoshkevich float e;
15*6dc29354SIlya Leoshkevich double d;
16*6dc29354SIlya Leoshkevich long double x;
17*6dc29354SIlya Leoshkevich char buf[16];
18*6dc29354SIlya Leoshkevich };
19*6dc29354SIlya Leoshkevich
20*6dc29354SIlya Leoshkevich /*
21*6dc29354SIlya Leoshkevich * PoP tables as close to the original as possible.
22*6dc29354SIlya Leoshkevich */
23*6dc29354SIlya Leoshkevich static const char *table1[N_SIGNED_CLASSES][N_SIGNED_CLASSES] = {
24*6dc29354SIlya Leoshkevich /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */
25*6dc29354SIlya Leoshkevich {/* -inf */ "P(+inf)", "P(+inf)", "Xi: T(dNaN)", "Xi: T(dNaN)", "P(-inf)", "P(-inf)", "P(b)", "Xi: T(b*)"},
26*6dc29354SIlya Leoshkevich {/* -Fn */ "P(+inf)", "P(a*b)", "P(+0)", "P(-0)", "P(a*b)", "P(-inf)", "P(b)", "Xi: T(b*)"},
27*6dc29354SIlya Leoshkevich {/* -0 */ "Xi: T(dNaN)", "P(+0)", "P(+0)", "P(-0)", "P(-0)", "Xi: T(dNaN)", "P(b)", "Xi: T(b*)"},
28*6dc29354SIlya Leoshkevich {/* +0 */ "Xi: T(dNaN)", "P(-0)", "P(-0)", "P(+0)", "P(+0)", "Xi: T(dNaN)", "P(b)", "Xi: T(b*)"},
29*6dc29354SIlya Leoshkevich {/* +Fn */ "P(-inf)", "P(a*b)", "P(-0)", "P(+0)", "P(a*b)", "P(+inf)", "P(b)", "Xi: T(b*)"},
30*6dc29354SIlya Leoshkevich {/* +inf */ "P(-inf)", "P(-inf)", "Xi: T(dNaN)", "Xi: T(dNaN)", "P(+inf)", "P(+inf)", "P(b)", "Xi: T(b*)"},
31*6dc29354SIlya Leoshkevich {/* QNaN */ "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "Xi: T(b*)"},
32*6dc29354SIlya Leoshkevich {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"},
33*6dc29354SIlya Leoshkevich };
34*6dc29354SIlya Leoshkevich
35*6dc29354SIlya Leoshkevich static const char *table2[N_SIGNED_CLASSES][N_SIGNED_CLASSES] = {
36*6dc29354SIlya Leoshkevich /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */
37*6dc29354SIlya Leoshkevich {/* -inf */ "T(-inf)", "T(-inf)", "T(-inf)", "T(-inf)", "T(-inf)", "Xi: T(dNaN)", "T(c)", "Xi: T(c*)"},
38*6dc29354SIlya Leoshkevich {/* -Fn */ "T(-inf)", "R(p+c)", "R(p)", "R(p)", "R(p+c)", "T(+inf)", "T(c)", "Xi: T(c*)"},
39*6dc29354SIlya Leoshkevich {/* -0 */ "T(-inf)", "R(c)", "T(-0)", "Rezd", "R(c)", "T(+inf)", "T(c)", "Xi: T(c*)"},
40*6dc29354SIlya Leoshkevich {/* +0 */ "T(-inf)", "R(c)", "Rezd", "T(+0)", "R(c)", "T(+inf)", "T(c)", "Xi: T(c*)"},
41*6dc29354SIlya Leoshkevich {/* +Fn */ "T(-inf)", "R(p+c)", "R(p)", "R(p)", "R(p+c)", "T(+inf)", "T(c)", "Xi: T(c*)"},
42*6dc29354SIlya Leoshkevich {/* +inf */ "Xi: T(dNaN)", "T(+inf)", "T(+inf)", "T(+inf)", "T(+inf)", "T(+inf)", "T(c)", "Xi: T(c*)"},
43*6dc29354SIlya Leoshkevich {/* QNaN */ "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "Xi: T(c*)"},
44*6dc29354SIlya Leoshkevich /* SNaN: can't happen */
45*6dc29354SIlya Leoshkevich };
46*6dc29354SIlya Leoshkevich
interpret_tables(union val * r,bool * xi,int fmt,int cls_a,const union val * a,int cls_b,const union val * b,int cls_c,const union val * c)47*6dc29354SIlya Leoshkevich static void interpret_tables(union val *r, bool *xi, int fmt,
48*6dc29354SIlya Leoshkevich int cls_a, const union val *a,
49*6dc29354SIlya Leoshkevich int cls_b, const union val *b,
50*6dc29354SIlya Leoshkevich int cls_c, const union val *c)
51*6dc29354SIlya Leoshkevich {
52*6dc29354SIlya Leoshkevich const char *spec1 = table1[cls_a][cls_b];
53*6dc29354SIlya Leoshkevich const char *spec2;
54*6dc29354SIlya Leoshkevich union val p;
55*6dc29354SIlya Leoshkevich int cls_p;
56*6dc29354SIlya Leoshkevich
57*6dc29354SIlya Leoshkevich *xi = false;
58*6dc29354SIlya Leoshkevich
59*6dc29354SIlya Leoshkevich if (strcmp(spec1, "P(-inf)") == 0) {
60*6dc29354SIlya Leoshkevich cls_p = CLASS_MINUS_INF;
61*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(+inf)") == 0) {
62*6dc29354SIlya Leoshkevich cls_p = CLASS_PLUS_INF;
63*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(-0)") == 0) {
64*6dc29354SIlya Leoshkevich cls_p = CLASS_MINUS_ZERO;
65*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(+0)") == 0) {
66*6dc29354SIlya Leoshkevich cls_p = CLASS_PLUS_ZERO;
67*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(a)") == 0) {
68*6dc29354SIlya Leoshkevich cls_p = cls_a;
69*6dc29354SIlya Leoshkevich memcpy(&p, a, sizeof(p));
70*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(b)") == 0) {
71*6dc29354SIlya Leoshkevich cls_p = cls_b;
72*6dc29354SIlya Leoshkevich memcpy(&p, b, sizeof(p));
73*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(a*b)") == 0) {
74*6dc29354SIlya Leoshkevich /*
75*6dc29354SIlya Leoshkevich * In the general case splitting fma into multiplication and addition
76*6dc29354SIlya Leoshkevich * doesn't work, but this is the case with our test inputs.
77*6dc29354SIlya Leoshkevich */
78*6dc29354SIlya Leoshkevich cls_p = cls_a == cls_b ? CLASS_PLUS_FN : CLASS_MINUS_FN;
79*6dc29354SIlya Leoshkevich switch (fmt) {
80*6dc29354SIlya Leoshkevich case 0:
81*6dc29354SIlya Leoshkevich p.e = a->e * b->e;
82*6dc29354SIlya Leoshkevich break;
83*6dc29354SIlya Leoshkevich case 1:
84*6dc29354SIlya Leoshkevich p.d = a->d * b->d;
85*6dc29354SIlya Leoshkevich break;
86*6dc29354SIlya Leoshkevich case 2:
87*6dc29354SIlya Leoshkevich p.x = a->x * b->x;
88*6dc29354SIlya Leoshkevich break;
89*6dc29354SIlya Leoshkevich default:
90*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", fmt);
91*6dc29354SIlya Leoshkevich exit(1);
92*6dc29354SIlya Leoshkevich }
93*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(dNaN)") == 0) {
94*6dc29354SIlya Leoshkevich memcpy(r, default_nans[fmt], sizeof(*r));
95*6dc29354SIlya Leoshkevich *xi = true;
96*6dc29354SIlya Leoshkevich return;
97*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(a*)") == 0) {
98*6dc29354SIlya Leoshkevich memcpy(r, a, sizeof(*r));
99*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt);
100*6dc29354SIlya Leoshkevich *xi = true;
101*6dc29354SIlya Leoshkevich return;
102*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(b*)") == 0) {
103*6dc29354SIlya Leoshkevich memcpy(r, b, sizeof(*r));
104*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt);
105*6dc29354SIlya Leoshkevich *xi = true;
106*6dc29354SIlya Leoshkevich return;
107*6dc29354SIlya Leoshkevich } else {
108*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported spec1: %s\n", spec1);
109*6dc29354SIlya Leoshkevich exit(1);
110*6dc29354SIlya Leoshkevich }
111*6dc29354SIlya Leoshkevich
112*6dc29354SIlya Leoshkevich spec2 = table2[cls_p][cls_c];
113*6dc29354SIlya Leoshkevich if (strcmp(spec2, "T(-inf)") == 0) {
114*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_MINUS_INF].v[0], sizeof(*r));
115*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(+inf)") == 0) {
116*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_PLUS_INF].v[0], sizeof(*r));
117*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(-0)") == 0) {
118*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_MINUS_ZERO].v[0], sizeof(*r));
119*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(+0)") == 0 || strcmp(spec2, "Rezd") == 0) {
120*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_PLUS_ZERO].v[0], sizeof(*r));
121*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(c)") == 0 || strcmp(spec2, "T(c)") == 0) {
122*6dc29354SIlya Leoshkevich memcpy(r, c, sizeof(*r));
123*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(p)") == 0 || strcmp(spec2, "T(p)") == 0) {
124*6dc29354SIlya Leoshkevich memcpy(r, &p, sizeof(*r));
125*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(p+c)") == 0 || strcmp(spec2, "T(p+c)") == 0) {
126*6dc29354SIlya Leoshkevich switch (fmt) {
127*6dc29354SIlya Leoshkevich case 0:
128*6dc29354SIlya Leoshkevich r->e = p.e + c->e;
129*6dc29354SIlya Leoshkevich break;
130*6dc29354SIlya Leoshkevich case 1:
131*6dc29354SIlya Leoshkevich r->d = p.d + c->d;
132*6dc29354SIlya Leoshkevich break;
133*6dc29354SIlya Leoshkevich case 2:
134*6dc29354SIlya Leoshkevich r->x = p.x + c->x;
135*6dc29354SIlya Leoshkevich break;
136*6dc29354SIlya Leoshkevich default:
137*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", fmt);
138*6dc29354SIlya Leoshkevich exit(1);
139*6dc29354SIlya Leoshkevich }
140*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "Xi: T(dNaN)") == 0) {
141*6dc29354SIlya Leoshkevich memcpy(r, default_nans[fmt], sizeof(*r));
142*6dc29354SIlya Leoshkevich *xi = true;
143*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "Xi: T(c*)") == 0) {
144*6dc29354SIlya Leoshkevich memcpy(r, c, sizeof(*r));
145*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt);
146*6dc29354SIlya Leoshkevich *xi = true;
147*6dc29354SIlya Leoshkevich } else {
148*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported spec2: %s\n", spec2);
149*6dc29354SIlya Leoshkevich exit(1);
150*6dc29354SIlya Leoshkevich }
151*6dc29354SIlya Leoshkevich }
152*6dc29354SIlya Leoshkevich
153*6dc29354SIlya Leoshkevich struct iter {
154*6dc29354SIlya Leoshkevich int fmt;
155*6dc29354SIlya Leoshkevich int cls[3];
156*6dc29354SIlya Leoshkevich int val[3];
157*6dc29354SIlya Leoshkevich };
158*6dc29354SIlya Leoshkevich
iter_next(struct iter * it)159*6dc29354SIlya Leoshkevich static bool iter_next(struct iter *it)
160*6dc29354SIlya Leoshkevich {
161*6dc29354SIlya Leoshkevich int i;
162*6dc29354SIlya Leoshkevich
163*6dc29354SIlya Leoshkevich for (i = 2; i >= 0; i--) {
164*6dc29354SIlya Leoshkevich if (++it->val[i] != signed_floats[it->fmt][it->cls[i]].n) {
165*6dc29354SIlya Leoshkevich return true;
166*6dc29354SIlya Leoshkevich }
167*6dc29354SIlya Leoshkevich it->val[i] = 0;
168*6dc29354SIlya Leoshkevich
169*6dc29354SIlya Leoshkevich if (++it->cls[i] != N_SIGNED_CLASSES) {
170*6dc29354SIlya Leoshkevich return true;
171*6dc29354SIlya Leoshkevich }
172*6dc29354SIlya Leoshkevich it->cls[i] = 0;
173*6dc29354SIlya Leoshkevich }
174*6dc29354SIlya Leoshkevich
175*6dc29354SIlya Leoshkevich return ++it->fmt != N_FORMATS;
176*6dc29354SIlya Leoshkevich }
177*6dc29354SIlya Leoshkevich
main(void)178*6dc29354SIlya Leoshkevich int main(void)
179*6dc29354SIlya Leoshkevich {
180*6dc29354SIlya Leoshkevich int ret = EXIT_SUCCESS;
181*6dc29354SIlya Leoshkevich struct iter it = {};
182*6dc29354SIlya Leoshkevich
183*6dc29354SIlya Leoshkevich do {
184*6dc29354SIlya Leoshkevich size_t n = float_sizes[it.fmt];
185*6dc29354SIlya Leoshkevich union val a, b, c, exp, res;
186*6dc29354SIlya Leoshkevich bool xi_exp, xi;
187*6dc29354SIlya Leoshkevich
188*6dc29354SIlya Leoshkevich memcpy(&a, signed_floats[it.fmt][it.cls[0]].v[it.val[0]], sizeof(a));
189*6dc29354SIlya Leoshkevich memcpy(&b, signed_floats[it.fmt][it.cls[1]].v[it.val[1]], sizeof(b));
190*6dc29354SIlya Leoshkevich memcpy(&c, signed_floats[it.fmt][it.cls[2]].v[it.val[2]], sizeof(c));
191*6dc29354SIlya Leoshkevich
192*6dc29354SIlya Leoshkevich interpret_tables(&exp, &xi_exp, it.fmt,
193*6dc29354SIlya Leoshkevich it.cls[1], &b, it.cls[2], &c, it.cls[0], &a);
194*6dc29354SIlya Leoshkevich
195*6dc29354SIlya Leoshkevich memcpy(&res, &a, sizeof(res));
196*6dc29354SIlya Leoshkevich feclearexcept(FE_ALL_EXCEPT);
197*6dc29354SIlya Leoshkevich switch (it.fmt) {
198*6dc29354SIlya Leoshkevich case 0:
199*6dc29354SIlya Leoshkevich asm("maebr %[a],%[b],%[c]"
200*6dc29354SIlya Leoshkevich : [a] "+f" (res.e) : [b] "f" (b.e), [c] "f" (c.e));
201*6dc29354SIlya Leoshkevich break;
202*6dc29354SIlya Leoshkevich case 1:
203*6dc29354SIlya Leoshkevich asm("madbr %[a],%[b],%[c]"
204*6dc29354SIlya Leoshkevich : [a] "+f" (res.d) : [b] "f" (b.d), [c] "f" (c.d));
205*6dc29354SIlya Leoshkevich break;
206*6dc29354SIlya Leoshkevich case 2:
207*6dc29354SIlya Leoshkevich asm("wfmaxb %[a],%[c],%[b],%[a]"
208*6dc29354SIlya Leoshkevich : [a] "+v" (res.x) : [b] "v" (b.x), [c] "v" (c.x));
209*6dc29354SIlya Leoshkevich break;
210*6dc29354SIlya Leoshkevich default:
211*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", it.fmt);
212*6dc29354SIlya Leoshkevich exit(1);
213*6dc29354SIlya Leoshkevich }
214*6dc29354SIlya Leoshkevich xi = fetestexcept(FE_ALL_EXCEPT) == FE_INVALID;
215*6dc29354SIlya Leoshkevich
216*6dc29354SIlya Leoshkevich if (memcmp(&res, &exp, n) != 0 || xi != xi_exp) {
217*6dc29354SIlya Leoshkevich fprintf(stderr, "[ FAILED ] ");
218*6dc29354SIlya Leoshkevich dump_v(stderr, &b, n);
219*6dc29354SIlya Leoshkevich fprintf(stderr, " * ");
220*6dc29354SIlya Leoshkevich dump_v(stderr, &c, n);
221*6dc29354SIlya Leoshkevich fprintf(stderr, " + ");
222*6dc29354SIlya Leoshkevich dump_v(stderr, &a, n);
223*6dc29354SIlya Leoshkevich fprintf(stderr, ": actual=");
224*6dc29354SIlya Leoshkevich dump_v(stderr, &res, n);
225*6dc29354SIlya Leoshkevich fprintf(stderr, "/%d, expected=", (int)xi);
226*6dc29354SIlya Leoshkevich dump_v(stderr, &exp, n);
227*6dc29354SIlya Leoshkevich fprintf(stderr, "/%d\n", (int)xi_exp);
228*6dc29354SIlya Leoshkevich ret = EXIT_FAILURE;
229*6dc29354SIlya Leoshkevich }
230*6dc29354SIlya Leoshkevich } while (iter_next(&it));
231*6dc29354SIlya Leoshkevich
232*6dc29354SIlya Leoshkevich return ret;
233*6dc29354SIlya Leoshkevich }
234