1 #define _GNU_SOURCE 2 #include <fenv.h> 3 #include <stdbool.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "float.h" 8 9 /* 10 * vfmin/vfmax instruction execution. 11 */ 12 #define VFMIN 0xEE 13 #define VFMAX 0xEF 14 15 extern char insn[6]; 16 asm(".pushsection .rwx,\"awx\",@progbits\n" 17 ".globl insn\n" 18 /* e7 89 a0 00 2e ef */ 19 "insn: vfmaxsb %v24,%v25,%v26,0\n" 20 ".popsection\n"); 21 22 static void vfminmax(unsigned int op, 23 unsigned int m4, unsigned int m5, unsigned int m6, 24 void *v1, const void *v2, const void *v3) 25 { 26 insn[3] = (m6 << 4) | m5; 27 insn[4] = (m4 << 4) | 0x0e; 28 insn[5] = op; 29 30 asm("vl %%v25,%[v2]\n" 31 "vl %%v26,%[v3]\n" 32 "ex 0,%[insn]\n" 33 "vst %%v24,%[v1]\n" 34 : [v1] "=m" (*(char (*)[16])v1) 35 : [v2] "m" (*(const char (*)[16])v2) 36 , [v3] "m" (*(const char (*)[16])v3) 37 , [insn] "m" (insn) 38 : "v24", "v25", "v26"); 39 } 40 41 /* 42 * PoP tables as close to the original as possible. 43 */ 44 struct signed_test { 45 int op; 46 int m6; 47 const char *m6_desc; 48 const char *table[N_SIGNED_CLASSES][N_SIGNED_CLASSES]; 49 } signed_tests[] = { 50 { 51 .op = VFMIN, 52 .m6 = 0, 53 .m6_desc = "IEEE MinNum", 54 .table = { 55 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 56 {/* -inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 57 {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 58 {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 59 {/* +0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 60 {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(a)", "Xi: T(b*)"}, 61 {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b*)"}, 62 {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 63 {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, 64 }, 65 }, 66 { 67 .op = VFMIN, 68 .m6 = 1, 69 .m6_desc = "JAVA Math.Min()", 70 .table = { 71 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 72 {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, 73 {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, 74 {/* -0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, 75 {/* +0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, 76 {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(b)", "Xi: T(b*)"}, 77 {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, 78 {/* QNaN */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 79 {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, 80 }, 81 }, 82 { 83 .op = VFMIN, 84 .m6 = 2, 85 .m6_desc = "C-style Min Macro", 86 .table = { 87 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 88 {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 89 {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 90 {/* -0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 91 {/* +0 */ "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 92 {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 93 {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b)", "Xi: T(b)"}, 94 {/* QNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, 95 {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, 96 }, 97 }, 98 { 99 .op = VFMIN, 100 .m6 = 3, 101 .m6_desc = "C++ algorithm.min()", 102 .table = { 103 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 104 {/* -inf */ "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 105 {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 106 {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 107 {/* +0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 108 {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 109 {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 110 {/* QNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, 111 {/* SNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, 112 }, 113 }, 114 { 115 .op = VFMIN, 116 .m6 = 4, 117 .m6_desc = "fmin()", 118 .table = { 119 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 120 {/* -inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, 121 {/* -Fn */ "T(b)", "T(M(a,b))", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, 122 {/* -0 */ "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, 123 {/* +0 */ "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, 124 {/* +Fn */ "T(b)", "T(b)", "T(b)", "T(b)", "T(M(a,b))", "T(a)", "T(a)", "Xi: T(a)"}, 125 {/* +inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "T(a)", "Xi: T(a)"}, 126 {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 127 {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(a)", "Xi: T(a)"}, 128 }, 129 }, 130 131 { 132 .op = VFMAX, 133 .m6 = 0, 134 .m6_desc = "IEEE MaxNum", 135 .table = { 136 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 137 {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 138 {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 139 {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 140 {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 141 {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(a)", "Xi: T(b*)"}, 142 {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 143 {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(b*)"}, 144 {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, 145 }, 146 }, 147 { 148 .op = VFMAX, 149 .m6 = 1, 150 .m6_desc = "JAVA Math.Max()", 151 .table = { 152 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 153 {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, 154 {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, 155 {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, 156 {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "Xi: T(b*)"}, 157 {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(b)", "Xi: T(b*)"}, 158 {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b*)"}, 159 {/* QNaN */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(b*)"}, 160 {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, 161 }, 162 }, 163 { 164 .op = VFMAX, 165 .m6 = 2, 166 .m6_desc = "C-style Max Macro", 167 .table = { 168 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 169 {/* -inf */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 170 {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 171 {/* -0 */ "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 172 {/* +0 */ "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 173 {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 174 {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "Xi: T(b)", "Xi: T(b)"}, 175 {/* QNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, 176 {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)"}, 177 }, 178 }, 179 { 180 .op = VFMAX, 181 .m6 = 3, 182 .m6_desc = "C++ algorithm.max()", 183 .table = { 184 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 185 {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, 186 {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, 187 {/* -0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, 188 {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "Xi: T(a)", "Xi: T(a)"}, 189 {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "Xi: T(a)", "Xi: T(a)"}, 190 {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)", "Xi: T(a)"}, 191 {/* QNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, 192 {/* SNaN */ "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)", "Xi: T(a)"}, 193 }, 194 }, 195 { 196 .op = VFMAX, 197 .m6 = 4, 198 .m6_desc = "fmax()", 199 .table = { 200 /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 201 {/* -inf */ "T(a)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 202 {/* -Fn */ "T(a)", "T(M(a,b))", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 203 {/* -0 */ "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 204 {/* +0 */ "T(a)", "T(a)", "T(a)", "T(a)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 205 {/* +Fn */ "T(a)", "T(a)", "T(a)", "T(a)", "T(M(a,b))", "T(b)", "T(a)", "Xi: T(a)"}, 206 {/* +inf */ "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "T(a)", "Xi: T(a)"}, 207 {/* QNaN */ "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(b)", "T(a)", "Xi: T(a)"}, 208 {/* SNaN */ "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(b)", "Xi: T(a)", "Xi: T(a)"}, 209 }, 210 }, 211 }; 212 213 static int signed_test(struct signed_test *test, int m4, int m5, 214 const void *v1_exp, bool xi_exp, 215 const void *v2, const void *v3) 216 { 217 size_t n = (m5 & 8) ? float_sizes[m4 - 2] : 16; 218 char v1[16]; 219 bool xi; 220 221 feclearexcept(FE_ALL_EXCEPT); 222 vfminmax(test->op, m4, m5, test->m6, v1, v2, v3); 223 xi = fetestexcept(FE_ALL_EXCEPT) == FE_INVALID; 224 225 if (memcmp(v1, v1_exp, n) != 0 || xi != xi_exp) { 226 fprintf(stderr, "[ FAILED ] %s ", test->m6_desc); 227 dump_v(stderr, v2, n); 228 fprintf(stderr, ", "); 229 dump_v(stderr, v3, n); 230 fprintf(stderr, ", %d, %d, %d: actual=", m4, m5, test->m6); 231 dump_v(stderr, v1, n); 232 fprintf(stderr, "/%d, expected=", (int)xi); 233 dump_v(stderr, v1_exp, n); 234 fprintf(stderr, "/%d\n", (int)xi_exp); 235 return 1; 236 } 237 238 return 0; 239 } 240 241 struct iter { 242 int cls[2]; 243 int val[2]; 244 }; 245 246 static bool iter_next(struct iter *it, int fmt) 247 { 248 int i; 249 250 for (i = 1; i >= 0; i--) { 251 if (++it->val[i] != signed_floats[fmt][it->cls[i]].n) { 252 return true; 253 } 254 it->val[i] = 0; 255 256 if (++it->cls[i] != N_SIGNED_CLASSES) { 257 return true; 258 } 259 it->cls[i] = 0; 260 } 261 262 return false; 263 } 264 265 int main(void) 266 { 267 int ret = 0; 268 size_t i; 269 270 for (i = 0; i < sizeof(signed_tests) / sizeof(signed_tests[0]); i++) { 271 struct signed_test *test = &signed_tests[i]; 272 int fmt; 273 274 for (fmt = 0; fmt < N_FORMATS; fmt++) { 275 size_t float_size = float_sizes[fmt]; 276 int m4 = fmt + 2; 277 int m5; 278 279 for (m5 = 0; m5 <= 8; m5 += 8) { 280 char v1_exp[16], v2[16], v3[16]; 281 bool xi_exp = false; 282 struct iter it = {}; 283 int pos = 0; 284 285 do { 286 const char *spec = test->table[it.cls[0]][it.cls[1]]; 287 288 memcpy(&v2[pos], 289 signed_floats[fmt][it.cls[0]].v[it.val[0]], 290 float_size); 291 memcpy(&v3[pos], 292 signed_floats[fmt][it.cls[1]].v[it.val[1]], 293 float_size); 294 if (strcmp(spec, "T(a)") == 0 || 295 strcmp(spec, "Xi: T(a)") == 0) { 296 memcpy(&v1_exp[pos], &v2[pos], float_size); 297 } else if (strcmp(spec, "T(b)") == 0 || 298 strcmp(spec, "Xi: T(b)") == 0) { 299 memcpy(&v1_exp[pos], &v3[pos], float_size); 300 } else if (strcmp(spec, "Xi: T(a*)") == 0) { 301 memcpy(&v1_exp[pos], &v2[pos], float_size); 302 snan_to_qnan(&v1_exp[pos], fmt); 303 } else if (strcmp(spec, "Xi: T(b*)") == 0) { 304 memcpy(&v1_exp[pos], &v3[pos], float_size); 305 snan_to_qnan(&v1_exp[pos], fmt); 306 } else if (strcmp(spec, "T(M(a,b))") == 0) { 307 /* 308 * Comparing floats is risky, since the compiler might 309 * generate the same instruction that we are testing. 310 * Compare ints instead. This works, because we get 311 * here only for +-Fn, and the corresponding test 312 * values have identical exponents. 313 */ 314 int v2_int = *(int *)&v2[pos]; 315 int v3_int = *(int *)&v3[pos]; 316 317 if ((v2_int < v3_int) == 318 ((test->op == VFMIN) != (v2_int < 0))) { 319 memcpy(&v1_exp[pos], &v2[pos], float_size); 320 } else { 321 memcpy(&v1_exp[pos], &v3[pos], float_size); 322 } 323 } else { 324 fprintf(stderr, "Unexpected spec: %s\n", spec); 325 return 1; 326 } 327 xi_exp |= spec[0] == 'X'; 328 pos += float_size; 329 330 if ((m5 & 8) || pos == 16) { 331 ret |= signed_test(test, m4, m5, 332 v1_exp, xi_exp, v2, v3); 333 pos = 0; 334 xi_exp = false; 335 } 336 } while (iter_next(&it, fmt)); 337 338 if (pos != 0) { 339 ret |= signed_test(test, m4, m5, v1_exp, xi_exp, v2, v3); 340 } 341 } 342 } 343 } 344 345 return ret; 346 } 347