1 /* 2 * Test Floating Point Conversion 3 */ 4 5 /* we want additional float type definitions */ 6 #define __STDC_WANT_IEC_60559_BFP_EXT__ 7 #define __STDC_WANT_IEC_60559_TYPES_EXT__ 8 9 #include <stdio.h> 10 #include <inttypes.h> 11 #include <math.h> 12 #include <float.h> 13 #include <fenv.h> 14 15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 16 17 static char flag_str[256]; 18 19 static char *get_flag_state(int flags) 20 { 21 if (flags) { 22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s", 23 flags & FE_OVERFLOW ? "OVERFLOW" : "", 24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "", 25 flags & FE_DIVBYZERO ? "DIV0" : "", 26 flags & FE_INEXACT ? "INEXACT" : "", 27 flags & FE_INVALID ? "INVALID" : ""); 28 } else { 29 snprintf(flag_str, sizeof(flag_str), "OK"); 30 } 31 32 return flag_str; 33 } 34 35 static void print_double_number(int i, double num) 36 { 37 uint64_t double_as_hex = *(uint64_t *) # 38 int flags = fetestexcept(FE_ALL_EXCEPT); 39 char *fstr = get_flag_state(flags); 40 41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n", 42 i, num, double_as_hex, flags, fstr); 43 } 44 45 static void print_single_number(int i, float num) 46 { 47 uint32_t single_as_hex = *(uint32_t *) # 48 int flags = fetestexcept(FE_ALL_EXCEPT); 49 char *fstr = get_flag_state(flags); 50 51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n", 52 i, num, single_as_hex, flags, fstr); 53 } 54 55 static void print_half_number(int i, uint16_t num) 56 { 57 int flags = fetestexcept(FE_ALL_EXCEPT); 58 char *fstr = get_flag_state(flags); 59 60 printf("%02d HALF: %#04x (%#x => %s)\n", 61 i, num, flags, fstr); 62 } 63 64 static void print_int64(int i, int64_t num) 65 { 66 uint64_t int64_as_hex = *(uint64_t *) # 67 int flags = fetestexcept(FE_ALL_EXCEPT); 68 char *fstr = get_flag_state(flags); 69 70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n", 71 i, num, int64_as_hex, flags, fstr); 72 } 73 74 #ifndef SNANF 75 /* Signaling NaN macros, if supported. */ 76 # define SNANF (__builtin_nansf ("")) 77 # define SNAN (__builtin_nans ("")) 78 # define SNANL (__builtin_nansl ("")) 79 #endif 80 81 float single_numbers[] = { -SNANF, 82 -NAN, 83 -INFINITY, 84 -FLT_MAX, 85 -1.111E+31, 86 -1.111E+30, 87 -1.08700982e-12, 88 -1.78051176e-20, 89 -FLT_MIN, 90 0.0, 91 FLT_MIN, 92 2.98023224e-08, 93 5.96046E-8, /* min positive FP16 subnormal */ 94 6.09756E-5, /* max subnormal FP16 */ 95 6.10352E-5, /* min positive normal FP16 */ 96 1.0, 97 1.0009765625, /* smallest float after 1.0 FP16 */ 98 2.0, 99 M_E, M_PI, 100 65503.0, 101 65504.0, /* max FP16 */ 102 65505.0, 103 131007.0, 104 131008.0, /* max AFP */ 105 131009.0, 106 1.111E+30, 107 FLT_MAX, 108 INFINITY, 109 NAN, 110 SNANF }; 111 112 static void convert_single_to_half(void) 113 { 114 int i; 115 116 printf("Converting single-precision to half-precision\n"); 117 118 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 119 float input = single_numbers[i]; 120 121 feclearexcept(FE_ALL_EXCEPT); 122 123 print_single_number(i, input); 124 #if defined(__arm__) 125 uint32_t output; 126 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input)); 127 #else 128 uint16_t output; 129 asm("fcvt %h0, %s1" : "=w" (output) : "w" (input)); 130 #endif 131 print_half_number(i, output); 132 } 133 } 134 135 static void convert_single_to_double(void) 136 { 137 int i; 138 139 printf("Converting single-precision to double-precision\n"); 140 141 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 142 float input = single_numbers[i]; 143 /* uint64_t output; */ 144 double output; 145 146 feclearexcept(FE_ALL_EXCEPT); 147 148 print_single_number(i, input); 149 #if defined(__arm__) 150 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input)); 151 #else 152 asm("fcvt %d0, %s1" : "=w" (output) : "w" (input)); 153 #endif 154 print_double_number(i, output); 155 } 156 } 157 158 static void convert_single_to_integer(void) 159 { 160 int i; 161 162 printf("Converting single-precision to integer\n"); 163 164 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 165 float input = single_numbers[i]; 166 int64_t output; 167 168 feclearexcept(FE_ALL_EXCEPT); 169 170 print_single_number(i, input); 171 #if defined(__arm__) 172 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 173 output = input; 174 #else 175 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input)); 176 #endif 177 print_int64(i, output); 178 } 179 } 180 181 /* This allows us to initialise some doubles as pure hex */ 182 typedef union { 183 double d; 184 uint64_t h; 185 } test_doubles; 186 187 test_doubles double_numbers[] = { 188 {SNAN}, 189 {-NAN}, 190 {-INFINITY}, 191 {-DBL_MAX}, 192 {-FLT_MAX-1.0}, 193 {-FLT_MAX}, 194 {-1.111E+31}, 195 {-1.111E+30}, /* half prec */ 196 {-2.0}, {-1.0}, 197 {-DBL_MIN}, 198 {-FLT_MIN}, 199 {0.0}, 200 {FLT_MIN}, 201 {2.98023224e-08}, 202 {5.96046E-8}, /* min positive FP16 subnormal */ 203 {6.09756E-5}, /* max subnormal FP16 */ 204 {6.10352E-5}, /* min positive normal FP16 */ 205 {1.0}, 206 {1.0009765625}, /* smallest float after 1.0 FP16 */ 207 {DBL_MIN}, 208 {1.3789972848607228e-308}, 209 {1.4914738736681624e-308}, 210 {1.0}, {2.0}, 211 {M_E}, {M_PI}, 212 {65503.0}, 213 {65504.0}, /* max FP16 */ 214 {65505.0}, 215 {131007.0}, 216 {131008.0}, /* max AFP */ 217 {131009.0}, 218 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */ 219 {FLT_MAX}, 220 {FLT_MAX + 1.0}, 221 {DBL_MAX}, 222 {INFINITY}, 223 {NAN}, 224 {.h = 0x7ff0000000000001}, /* SNAN */ 225 {SNAN}, 226 }; 227 228 static void convert_double_to_half(void) 229 { 230 int i; 231 232 printf("Converting double-precision to half-precision\n"); 233 234 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 235 double input = double_numbers[i].d; 236 uint16_t output; 237 238 feclearexcept(FE_ALL_EXCEPT); 239 240 print_double_number(i, input); 241 242 /* as we don't have _Float16 support */ 243 #if defined(__arm__) 244 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */ 245 output = input; 246 #else 247 asm("fcvt %h0, %d1" : "=w" (output) : "w" (input)); 248 #endif 249 print_half_number(i, output); 250 } 251 } 252 253 static void convert_double_to_single(void) 254 { 255 int i; 256 257 printf("Converting double-precision to single-precision\n"); 258 259 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 260 double input = double_numbers[i].d; 261 float output; 262 263 feclearexcept(FE_ALL_EXCEPT); 264 265 print_double_number(i, input); 266 267 #if defined(__arm__) 268 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input)); 269 #else 270 asm("fcvt %s0, %d1" : "=w" (output) : "w" (input)); 271 #endif 272 273 print_single_number(i, output); 274 } 275 } 276 277 static void convert_double_to_integer(void) 278 { 279 int i; 280 281 printf("Converting double-precision to integer\n"); 282 283 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 284 double input = double_numbers[i].d; 285 int64_t output; 286 287 feclearexcept(FE_ALL_EXCEPT); 288 289 print_double_number(i, input); 290 #if defined(__arm__) 291 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 292 output = input; 293 #else 294 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input)); 295 #endif 296 print_int64(i, output); 297 } 298 } 299 300 /* no handy defines for these numbers */ 301 uint16_t half_numbers[] = { 302 0xffff, /* -NaN / AHP -Max */ 303 0xfcff, /* -NaN / AHP */ 304 0xfc01, /* -NaN / AHP */ 305 0xfc00, /* -Inf */ 306 0xfbff, /* -Max */ 307 0xc000, /* -2 */ 308 0xbc00, /* -1 */ 309 0x8001, /* -MIN subnormal */ 310 0x8000, /* -0 */ 311 0x0000, /* +0 */ 312 0x0001, /* MIN subnormal */ 313 0x3c00, /* 1 */ 314 0x7bff, /* Max */ 315 0x7c00, /* Inf */ 316 0x7c01, /* NaN / AHP */ 317 0x7cff, /* NaN / AHP */ 318 0x7fff, /* NaN / AHP +Max*/ 319 }; 320 321 static void convert_half_to_double(void) 322 { 323 int i; 324 325 printf("Converting half-precision to double-precision\n"); 326 327 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 328 uint16_t input = half_numbers[i]; 329 double output; 330 331 feclearexcept(FE_ALL_EXCEPT); 332 333 print_half_number(i, input); 334 #if defined(__arm__) 335 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */ 336 output = input; 337 #else 338 asm("fcvt %d0, %h1" : "=w" (output) : "w" (input)); 339 #endif 340 print_double_number(i, output); 341 } 342 } 343 344 static void convert_half_to_single(void) 345 { 346 int i; 347 348 printf("Converting half-precision to single-precision\n"); 349 350 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 351 uint16_t input = half_numbers[i]; 352 float output; 353 354 feclearexcept(FE_ALL_EXCEPT); 355 356 print_half_number(i, input); 357 #if defined(__arm__) 358 /* 359 * Clang refuses to allocate an integer to a fp register. 360 * Perform the move from a general register by hand. 361 */ 362 asm("vmov %0, %1\n\t" 363 "vcvtb.f32.f16 %0, %0" : "=w" (output) : "r" (input)); 364 #else 365 asm("fcvt %s0, %h1" : "=w" (output) : "w" (input)); 366 #endif 367 print_single_number(i, output); 368 } 369 } 370 371 static void convert_half_to_integer(void) 372 { 373 int i; 374 375 printf("Converting half-precision to integer\n"); 376 377 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 378 uint16_t input = half_numbers[i]; 379 int64_t output; 380 381 feclearexcept(FE_ALL_EXCEPT); 382 383 print_half_number(i, input); 384 #if defined(__arm__) 385 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/ 386 output = input; 387 #else 388 asm("fcvt %s0, %h1" : "=w" (output) : "w" (input)); 389 #endif 390 print_int64(i, output); 391 } 392 } 393 394 typedef struct { 395 int flag; 396 char *desc; 397 } float_mapping; 398 399 float_mapping round_flags[] = { 400 { FE_TONEAREST, "to nearest" }, 401 { FE_UPWARD, "upwards" }, 402 { FE_DOWNWARD, "downwards" }, 403 { FE_TOWARDZERO, "to zero" } 404 }; 405 406 int main(int argc, char *argv[argc]) 407 { 408 int i; 409 410 printf("#### Enabling IEEE Half Precision\n"); 411 412 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 413 fesetround(round_flags[i].flag); 414 printf("### Rounding %s\n", round_flags[i].desc); 415 convert_single_to_half(); 416 convert_single_to_double(); 417 convert_double_to_half(); 418 convert_double_to_single(); 419 convert_half_to_single(); 420 convert_half_to_double(); 421 } 422 423 /* convert to integer */ 424 convert_single_to_integer(); 425 convert_double_to_integer(); 426 convert_half_to_integer(); 427 428 /* And now with ARM alternative FP16 */ 429 #if defined(__arm__) 430 asm("vmrs r1, fpscr\n\t" 431 "orr r1, r1, %[flags]\n\t" 432 "vmsr fpscr, r1" 433 : /* no output */ : [flags] "n" (1 << 26) : "r1" ); 434 #else 435 asm("mrs x1, fpcr\n\t" 436 "orr x1, x1, %[flags]\n\t" 437 "msr fpcr, x1\n\t" 438 : /* no output */ : [flags] "n" (1 << 26) : "x1" ); 439 #endif 440 441 printf("#### Enabling ARM Alternative Half Precision\n"); 442 443 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 444 fesetround(round_flags[i].flag); 445 printf("### Rounding %s\n", round_flags[i].desc); 446 convert_single_to_half(); 447 convert_single_to_double(); 448 convert_double_to_half(); 449 convert_double_to_single(); 450 convert_half_to_single(); 451 convert_half_to_double(); 452 } 453 454 /* convert to integer */ 455 convert_single_to_integer(); 456 convert_double_to_integer(); 457 convert_half_to_integer(); 458 459 return 0; 460 } 461