1 /* 2 * Test Floating Point Conversion 3 */ 4 5 /* we want additional float type definitions */ 6 #define __STDC_WANT_IEC_60559_BFP_EXT__ 7 #define __STDC_WANT_IEC_60559_TYPES_EXT__ 8 9 #include <stdio.h> 10 #include <inttypes.h> 11 #include <math.h> 12 #include <float.h> 13 #include <fenv.h> 14 15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 16 17 static char flag_str[256]; 18 19 static char *get_flag_state(int flags) 20 { 21 if (flags) { 22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s", 23 flags & FE_OVERFLOW ? "OVERFLOW" : "", 24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "", 25 flags & FE_DIVBYZERO ? "DIV0" : "", 26 flags & FE_INEXACT ? "INEXACT" : "", 27 flags & FE_INVALID ? "INVALID" : ""); 28 } else { 29 snprintf(flag_str, sizeof(flag_str), "OK"); 30 } 31 32 return flag_str; 33 } 34 35 static void print_double_number(int i, double num) 36 { 37 uint64_t double_as_hex = *(uint64_t *) # 38 int flags = fetestexcept(FE_ALL_EXCEPT); 39 char *fstr = get_flag_state(flags); 40 41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n", 42 i, num, double_as_hex, flags, fstr); 43 } 44 45 static void print_single_number(int i, float num) 46 { 47 uint32_t single_as_hex = *(uint32_t *) # 48 int flags = fetestexcept(FE_ALL_EXCEPT); 49 char *fstr = get_flag_state(flags); 50 51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n", 52 i, num, single_as_hex, flags, fstr); 53 } 54 55 static void print_half_number(int i, uint16_t num) 56 { 57 int flags = fetestexcept(FE_ALL_EXCEPT); 58 char *fstr = get_flag_state(flags); 59 60 printf("%02d HALF: %#04x (%#x => %s)\n", 61 i, num, flags, fstr); 62 } 63 64 static void print_int64(int i, int64_t num) 65 { 66 uint64_t int64_as_hex = *(uint64_t *) # 67 int flags = fetestexcept(FE_ALL_EXCEPT); 68 char *fstr = get_flag_state(flags); 69 70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n", 71 i, num, int64_as_hex, flags, fstr); 72 } 73 74 #ifndef SNANF 75 /* Signaling NaN macros, if supported. */ 76 # define SNANF (__builtin_nansf ("")) 77 # define SNAN (__builtin_nans ("")) 78 # define SNANL (__builtin_nansl ("")) 79 #endif 80 81 float single_numbers[] = { -SNANF, 82 -NAN, 83 -INFINITY, 84 -FLT_MAX, 85 -1.111E+31, 86 -1.111E+30, 87 -1.08700982e-12, 88 -1.78051176e-20, 89 -FLT_MIN, 90 0.0, 91 FLT_MIN, 92 2.98023224e-08, 93 5.96046E-8, /* min positive FP16 subnormal */ 94 6.09756E-5, /* max subnormal FP16 */ 95 6.10352E-5, /* min positive normal FP16 */ 96 1.0, 97 1.0009765625, /* smallest float after 1.0 FP16 */ 98 2.0, 99 M_E, M_PI, 100 65503.0, 101 65504.0, /* max FP16 */ 102 65505.0, 103 131007.0, 104 131008.0, /* max AFP */ 105 131009.0, 106 1.111E+30, 107 FLT_MAX, 108 INFINITY, 109 NAN, 110 SNANF }; 111 112 static void convert_single_to_half(void) 113 { 114 int i; 115 116 printf("Converting single-precision to half-precision\n"); 117 118 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 119 float input = single_numbers[i]; 120 121 feclearexcept(FE_ALL_EXCEPT); 122 123 print_single_number(i, input); 124 #if defined(__arm__) 125 uint32_t output; 126 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input)); 127 #else 128 uint16_t output; 129 asm("fcvt %h0, %s1" : "=w" (output) : "x" (input)); 130 #endif 131 print_half_number(i, output); 132 } 133 } 134 135 static void convert_single_to_double(void) 136 { 137 int i; 138 139 printf("Converting single-precision to double-precision\n"); 140 141 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 142 float input = single_numbers[i]; 143 /* uint64_t output; */ 144 double output; 145 146 feclearexcept(FE_ALL_EXCEPT); 147 148 print_single_number(i, input); 149 #if defined(__arm__) 150 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input)); 151 #else 152 asm("fcvt %d0, %s1" : "=w" (output) : "x" (input)); 153 #endif 154 print_double_number(i, output); 155 } 156 } 157 158 static void convert_single_to_integer(void) 159 { 160 int i; 161 162 printf("Converting single-precision to integer\n"); 163 164 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 165 float input = single_numbers[i]; 166 int64_t output; 167 168 feclearexcept(FE_ALL_EXCEPT); 169 170 print_single_number(i, input); 171 #if defined(__arm__) 172 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 173 output = input; 174 #else 175 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input)); 176 #endif 177 print_int64(i, output); 178 } 179 } 180 181 /* This allows us to initialise some doubles as pure hex */ 182 typedef union { 183 double d; 184 uint64_t h; 185 } test_doubles; 186 187 test_doubles double_numbers[] = { 188 {SNAN}, 189 {-NAN}, 190 {-INFINITY}, 191 {-DBL_MAX}, 192 {-FLT_MAX-1.0}, 193 {-FLT_MAX}, 194 {-1.111E+31}, 195 {-1.111E+30}, /* half prec */ 196 {-2.0}, {-1.0}, 197 {-DBL_MIN}, 198 {-FLT_MIN}, 199 {0.0}, 200 {FLT_MIN}, 201 {2.98023224e-08}, 202 {5.96046E-8}, /* min positive FP16 subnormal */ 203 {6.09756E-5}, /* max subnormal FP16 */ 204 {6.10352E-5}, /* min positive normal FP16 */ 205 {1.0}, 206 {1.0009765625}, /* smallest float after 1.0 FP16 */ 207 {DBL_MIN}, 208 {1.3789972848607228e-308}, 209 {1.4914738736681624e-308}, 210 {1.0}, {2.0}, 211 {M_E}, {M_PI}, 212 {65503.0}, 213 {65504.0}, /* max FP16 */ 214 {65505.0}, 215 {131007.0}, 216 {131008.0}, /* max AFP */ 217 {131009.0}, 218 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */ 219 {FLT_MAX}, 220 {FLT_MAX + 1.0}, 221 {DBL_MAX}, 222 {INFINITY}, 223 {NAN}, 224 {.h = 0x7ff0000000000001}, /* SNAN */ 225 {SNAN}, 226 }; 227 228 static void convert_double_to_half(void) 229 { 230 int i; 231 232 printf("Converting double-precision to half-precision\n"); 233 234 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 235 double input = double_numbers[i].d; 236 uint16_t output; 237 238 feclearexcept(FE_ALL_EXCEPT); 239 240 print_double_number(i, input); 241 242 /* as we don't have _Float16 support */ 243 #if defined(__arm__) 244 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */ 245 output = input; 246 #else 247 asm("fcvt %h0, %d1" : "=w" (output) : "x" (input)); 248 #endif 249 print_half_number(i, output); 250 } 251 } 252 253 static void convert_double_to_single(void) 254 { 255 int i; 256 257 printf("Converting double-precision to single-precision\n"); 258 259 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 260 double input = double_numbers[i].d; 261 uint32_t output; 262 263 feclearexcept(FE_ALL_EXCEPT); 264 265 print_double_number(i, input); 266 267 #if defined(__arm__) 268 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input)); 269 #else 270 asm("fcvt %s0, %d1" : "=w" (output) : "x" (input)); 271 #endif 272 273 print_single_number(i, output); 274 } 275 } 276 277 static void convert_double_to_integer(void) 278 { 279 int i; 280 281 printf("Converting double-precision to integer\n"); 282 283 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 284 double input = double_numbers[i].d; 285 int64_t output; 286 287 feclearexcept(FE_ALL_EXCEPT); 288 289 print_double_number(i, input); 290 #if defined(__arm__) 291 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 292 output = input; 293 #else 294 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input)); 295 #endif 296 print_int64(i, output); 297 } 298 } 299 300 /* no handy defines for these numbers */ 301 uint16_t half_numbers[] = { 302 0xffff, /* -NaN / AHP -Max */ 303 0xfcff, /* -NaN / AHP */ 304 0xfc01, /* -NaN / AHP */ 305 0xfc00, /* -Inf */ 306 0xfbff, /* -Max */ 307 0xc000, /* -2 */ 308 0xbc00, /* -1 */ 309 0x8001, /* -MIN subnormal */ 310 0x8000, /* -0 */ 311 0x0000, /* +0 */ 312 0x0001, /* MIN subnormal */ 313 0x3c00, /* 1 */ 314 0x7bff, /* Max */ 315 0x7c00, /* Inf */ 316 0x7c01, /* NaN / AHP */ 317 0x7cff, /* NaN / AHP */ 318 0x7fff, /* NaN / AHP +Max*/ 319 }; 320 321 static void convert_half_to_double(void) 322 { 323 int i; 324 325 printf("Converting half-precision to double-precision\n"); 326 327 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 328 uint16_t input = half_numbers[i]; 329 double output; 330 331 feclearexcept(FE_ALL_EXCEPT); 332 333 print_half_number(i, input); 334 #if defined(__arm__) 335 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */ 336 output = input; 337 #else 338 asm("fcvt %d0, %h1" : "=w" (output) : "x" (input)); 339 #endif 340 print_double_number(i, output); 341 } 342 } 343 344 static void convert_half_to_single(void) 345 { 346 int i; 347 348 printf("Converting half-precision to single-precision\n"); 349 350 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 351 uint16_t input = half_numbers[i]; 352 float output; 353 354 feclearexcept(FE_ALL_EXCEPT); 355 356 print_half_number(i, input); 357 #if defined(__arm__) 358 asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input)); 359 #else 360 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); 361 #endif 362 print_single_number(i, output); 363 } 364 } 365 366 static void convert_half_to_integer(void) 367 { 368 int i; 369 370 printf("Converting half-precision to integer\n"); 371 372 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 373 uint16_t input = half_numbers[i]; 374 int64_t output; 375 376 feclearexcept(FE_ALL_EXCEPT); 377 378 print_half_number(i, input); 379 #if defined(__arm__) 380 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/ 381 output = input; 382 #else 383 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); 384 #endif 385 print_int64(i, output); 386 } 387 } 388 389 typedef struct { 390 int flag; 391 char *desc; 392 } float_mapping; 393 394 float_mapping round_flags[] = { 395 { FE_TONEAREST, "to nearest" }, 396 { FE_UPWARD, "upwards" }, 397 { FE_DOWNWARD, "downwards" }, 398 { FE_TOWARDZERO, "to zero" } 399 }; 400 401 int main(int argc, char *argv[argc]) 402 { 403 int i; 404 405 printf("#### Enabling IEEE Half Precision\n"); 406 407 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 408 fesetround(round_flags[i].flag); 409 printf("### Rounding %s\n", round_flags[i].desc); 410 convert_single_to_half(); 411 convert_single_to_double(); 412 convert_double_to_half(); 413 convert_double_to_single(); 414 convert_half_to_single(); 415 convert_half_to_double(); 416 } 417 418 /* convert to integer */ 419 convert_single_to_integer(); 420 convert_double_to_integer(); 421 convert_half_to_integer(); 422 423 /* And now with ARM alternative FP16 */ 424 #if defined(__arm__) 425 /* See glibc sysdeps/arm/fpu_control.h */ 426 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t" 427 "orr r1, r1, %[flags]\n\t" 428 "mcr p10, 7, r1, cr1, cr0, 0\n\t" 429 : /* no output */ : [flags] "n" (1 << 26) : "r1" ); 430 #else 431 asm("mrs x1, fpcr\n\t" 432 "orr x1, x1, %[flags]\n\t" 433 "msr fpcr, x1\n\t" 434 : /* no output */ : [flags] "n" (1 << 26) : "x1" ); 435 #endif 436 437 printf("#### Enabling ARM Alternative Half Precision\n"); 438 439 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 440 fesetround(round_flags[i].flag); 441 printf("### Rounding %s\n", round_flags[i].desc); 442 convert_single_to_half(); 443 convert_single_to_double(); 444 convert_double_to_half(); 445 convert_double_to_single(); 446 convert_half_to_single(); 447 convert_half_to_double(); 448 } 449 450 /* convert to integer */ 451 convert_single_to_integer(); 452 convert_double_to_integer(); 453 convert_half_to_integer(); 454 455 return 0; 456 } 457