1 /* 2 * Test Floating Point Conversion 3 */ 4 5 /* we want additional float type definitions */ 6 #define __STDC_WANT_IEC_60559_BFP_EXT__ 7 #define __STDC_WANT_IEC_60559_TYPES_EXT__ 8 9 #include <stdio.h> 10 #include <inttypes.h> 11 #include <math.h> 12 #include <float.h> 13 #include <fenv.h> 14 15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 16 17 static char flag_str[256]; 18 19 static char *get_flag_state(int flags) 20 { 21 if (flags) { 22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s", 23 flags & FE_OVERFLOW ? "OVERFLOW" : "", 24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "", 25 flags & FE_DIVBYZERO ? "DIV0" : "", 26 flags & FE_INEXACT ? "INEXACT" : "", 27 flags & FE_INVALID ? "INVALID" : ""); 28 } else { 29 snprintf(flag_str, sizeof(flag_str), "OK"); 30 } 31 32 return flag_str; 33 } 34 35 static void print_double_number(int i, double num) 36 { 37 uint64_t double_as_hex = *(uint64_t *) # 38 int flags = fetestexcept(FE_ALL_EXCEPT); 39 char *fstr = get_flag_state(flags); 40 41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n", 42 i, num, double_as_hex, flags, fstr); 43 } 44 45 static void print_single_number(int i, float num) 46 { 47 uint32_t single_as_hex = *(uint32_t *) # 48 int flags = fetestexcept(FE_ALL_EXCEPT); 49 char *fstr = get_flag_state(flags); 50 51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n", 52 i, num, single_as_hex, flags, fstr); 53 } 54 55 static void print_half_number(int i, uint16_t num) 56 { 57 int flags = fetestexcept(FE_ALL_EXCEPT); 58 char *fstr = get_flag_state(flags); 59 60 printf("%02d HALF: %#04x (%#x => %s)\n", 61 i, num, flags, fstr); 62 } 63 64 static void print_int64(int i, int64_t num) 65 { 66 uint64_t int64_as_hex = *(uint64_t *) # 67 int flags = fetestexcept(FE_ALL_EXCEPT); 68 char *fstr = get_flag_state(flags); 69 70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n", 71 i, num, int64_as_hex, flags, fstr); 72 } 73 74 #ifndef SNANF 75 /* Signaling NaN macros, if supported. */ 76 # if __GNUC_PREREQ(3, 3) 77 # define SNANF (__builtin_nansf ("")) 78 # define SNAN (__builtin_nans ("")) 79 # define SNANL (__builtin_nansl ("")) 80 # endif 81 #endif 82 83 float single_numbers[] = { -SNANF, 84 -NAN, 85 -INFINITY, 86 -FLT_MAX, 87 -1.111E+31, 88 -1.111E+30, 89 -1.08700982e-12, 90 -1.78051176e-20, 91 -FLT_MIN, 92 0.0, 93 FLT_MIN, 94 2.98023224e-08, 95 5.96046E-8, /* min positive FP16 subnormal */ 96 6.09756E-5, /* max subnormal FP16 */ 97 6.10352E-5, /* min positive normal FP16 */ 98 1.0, 99 1.0009765625, /* smallest float after 1.0 FP16 */ 100 2.0, 101 M_E, M_PI, 102 65503.0, 103 65504.0, /* max FP16 */ 104 65505.0, 105 131007.0, 106 131008.0, /* max AFP */ 107 131009.0, 108 1.111E+30, 109 FLT_MAX, 110 INFINITY, 111 NAN, 112 SNANF }; 113 114 static void convert_single_to_half(void) 115 { 116 int i; 117 118 printf("Converting single-precision to half-precision\n"); 119 120 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 121 float input = single_numbers[i]; 122 123 feclearexcept(FE_ALL_EXCEPT); 124 125 print_single_number(i, input); 126 #if defined(__arm__) 127 uint32_t output; 128 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input)); 129 #else 130 uint16_t output; 131 asm("fcvt %h0, %s1" : "=w" (output) : "x" (input)); 132 #endif 133 print_half_number(i, output); 134 } 135 } 136 137 static void convert_single_to_double(void) 138 { 139 int i; 140 141 printf("Converting single-precision to double-precision\n"); 142 143 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 144 float input = single_numbers[i]; 145 /* uint64_t output; */ 146 double output; 147 148 feclearexcept(FE_ALL_EXCEPT); 149 150 print_single_number(i, input); 151 #if defined(__arm__) 152 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input)); 153 #else 154 asm("fcvt %d0, %s1" : "=w" (output) : "x" (input)); 155 #endif 156 print_double_number(i, output); 157 } 158 } 159 160 static void convert_single_to_integer(void) 161 { 162 int i; 163 164 printf("Converting single-precision to integer\n"); 165 166 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { 167 float input = single_numbers[i]; 168 int64_t output; 169 170 feclearexcept(FE_ALL_EXCEPT); 171 172 print_single_number(i, input); 173 #if defined(__arm__) 174 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 175 output = input; 176 #else 177 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input)); 178 #endif 179 print_int64(i, output); 180 } 181 } 182 183 /* This allows us to initialise some doubles as pure hex */ 184 typedef union { 185 double d; 186 uint64_t h; 187 } test_doubles; 188 189 test_doubles double_numbers[] = { 190 {SNAN}, 191 {-NAN}, 192 {-INFINITY}, 193 {-DBL_MAX}, 194 {-FLT_MAX-1.0}, 195 {-FLT_MAX}, 196 {-1.111E+31}, 197 {-1.111E+30}, /* half prec */ 198 {-2.0}, {-1.0}, 199 {-DBL_MIN}, 200 {-FLT_MIN}, 201 {0.0}, 202 {FLT_MIN}, 203 {2.98023224e-08}, 204 {5.96046E-8}, /* min positive FP16 subnormal */ 205 {6.09756E-5}, /* max subnormal FP16 */ 206 {6.10352E-5}, /* min positive normal FP16 */ 207 {1.0}, 208 {1.0009765625}, /* smallest float after 1.0 FP16 */ 209 {DBL_MIN}, 210 {1.3789972848607228e-308}, 211 {1.4914738736681624e-308}, 212 {1.0}, {2.0}, 213 {M_E}, {M_PI}, 214 {65503.0}, 215 {65504.0}, /* max FP16 */ 216 {65505.0}, 217 {131007.0}, 218 {131008.0}, /* max AFP */ 219 {131009.0}, 220 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */ 221 {FLT_MAX}, 222 {FLT_MAX + 1.0}, 223 {DBL_MAX}, 224 {INFINITY}, 225 {NAN}, 226 {.h = 0x7ff0000000000001}, /* SNAN */ 227 {SNAN}, 228 }; 229 230 static void convert_double_to_half(void) 231 { 232 int i; 233 234 printf("Converting double-precision to half-precision\n"); 235 236 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 237 double input = double_numbers[i].d; 238 uint16_t output; 239 240 feclearexcept(FE_ALL_EXCEPT); 241 242 print_double_number(i, input); 243 244 /* as we don't have _Float16 support */ 245 #if defined(__arm__) 246 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */ 247 output = input; 248 #else 249 asm("fcvt %h0, %d1" : "=w" (output) : "x" (input)); 250 #endif 251 print_half_number(i, output); 252 } 253 } 254 255 static void convert_double_to_single(void) 256 { 257 int i; 258 259 printf("Converting double-precision to single-precision\n"); 260 261 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 262 double input = double_numbers[i].d; 263 uint32_t output; 264 265 feclearexcept(FE_ALL_EXCEPT); 266 267 print_double_number(i, input); 268 269 #if defined(__arm__) 270 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input)); 271 #else 272 asm("fcvt %s0, %d1" : "=w" (output) : "x" (input)); 273 #endif 274 275 print_single_number(i, output); 276 } 277 } 278 279 static void convert_double_to_integer(void) 280 { 281 int i; 282 283 printf("Converting double-precision to integer\n"); 284 285 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { 286 double input = double_numbers[i].d; 287 int64_t output; 288 289 feclearexcept(FE_ALL_EXCEPT); 290 291 print_double_number(i, input); 292 #if defined(__arm__) 293 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ 294 output = input; 295 #else 296 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input)); 297 #endif 298 print_int64(i, output); 299 } 300 } 301 302 /* no handy defines for these numbers */ 303 uint16_t half_numbers[] = { 304 0xffff, /* -NaN / AHP -Max */ 305 0xfcff, /* -NaN / AHP */ 306 0xfc01, /* -NaN / AHP */ 307 0xfc00, /* -Inf */ 308 0xfbff, /* -Max */ 309 0xc000, /* -2 */ 310 0xbc00, /* -1 */ 311 0x8001, /* -MIN subnormal */ 312 0x8000, /* -0 */ 313 0x0000, /* +0 */ 314 0x0001, /* MIN subnormal */ 315 0x3c00, /* 1 */ 316 0x7bff, /* Max */ 317 0x7c00, /* Inf */ 318 0x7c01, /* NaN / AHP */ 319 0x7cff, /* NaN / AHP */ 320 0x7fff, /* NaN / AHP +Max*/ 321 }; 322 323 static void convert_half_to_double(void) 324 { 325 int i; 326 327 printf("Converting half-precision to double-precision\n"); 328 329 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 330 uint16_t input = half_numbers[i]; 331 double output; 332 333 feclearexcept(FE_ALL_EXCEPT); 334 335 print_half_number(i, input); 336 #if defined(__arm__) 337 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */ 338 output = input; 339 #else 340 asm("fcvt %d0, %h1" : "=w" (output) : "x" (input)); 341 #endif 342 print_double_number(i, output); 343 } 344 } 345 346 static void convert_half_to_single(void) 347 { 348 int i; 349 350 printf("Converting half-precision to single-precision\n"); 351 352 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 353 uint16_t input = half_numbers[i]; 354 float output; 355 356 feclearexcept(FE_ALL_EXCEPT); 357 358 print_half_number(i, input); 359 #if defined(__arm__) 360 asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input)); 361 #else 362 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); 363 #endif 364 print_single_number(i, output); 365 } 366 } 367 368 static void convert_half_to_integer(void) 369 { 370 int i; 371 372 printf("Converting half-precision to integer\n"); 373 374 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { 375 uint16_t input = half_numbers[i]; 376 int64_t output; 377 378 feclearexcept(FE_ALL_EXCEPT); 379 380 print_half_number(i, input); 381 #if defined(__arm__) 382 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/ 383 output = input; 384 #else 385 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); 386 #endif 387 print_int64(i, output); 388 } 389 } 390 391 typedef struct { 392 int flag; 393 char *desc; 394 } float_mapping; 395 396 float_mapping round_flags[] = { 397 { FE_TONEAREST, "to nearest" }, 398 { FE_UPWARD, "upwards" }, 399 { FE_DOWNWARD, "downwards" }, 400 { FE_TOWARDZERO, "to zero" } 401 }; 402 403 int main(int argc, char *argv[argc]) 404 { 405 int i; 406 407 printf("#### Enabling IEEE Half Precision\n"); 408 409 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 410 fesetround(round_flags[i].flag); 411 printf("### Rounding %s\n", round_flags[i].desc); 412 convert_single_to_half(); 413 convert_single_to_double(); 414 convert_double_to_half(); 415 convert_double_to_single(); 416 convert_half_to_single(); 417 convert_half_to_double(); 418 } 419 420 /* convert to integer */ 421 convert_single_to_integer(); 422 convert_double_to_integer(); 423 convert_half_to_integer(); 424 425 /* And now with ARM alternative FP16 */ 426 #if defined(__arm__) 427 /* See glibc sysdeps/arm/fpu_control.h */ 428 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t" 429 "orr r1, r1, %[flags]\n\t" 430 "mcr p10, 7, r1, cr1, cr0, 0\n\t" 431 : /* no output */ : [flags] "n" (1 << 26) : "r1" ); 432 #else 433 asm("mrs x1, fpcr\n\t" 434 "orr x1, x1, %[flags]\n\t" 435 "msr fpcr, x1\n\t" 436 : /* no output */ : [flags] "n" (1 << 26) : "x1" ); 437 #endif 438 439 printf("#### Enabling ARM Alternative Half Precision\n"); 440 441 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { 442 fesetround(round_flags[i].flag); 443 printf("### Rounding %s\n", round_flags[i].desc); 444 convert_single_to_half(); 445 convert_single_to_double(); 446 convert_double_to_half(); 447 convert_double_to_single(); 448 convert_half_to_single(); 449 convert_half_to_double(); 450 } 451 452 /* convert to integer */ 453 convert_single_to_integer(); 454 convert_double_to_integer(); 455 convert_half_to_integer(); 456 457 return 0; 458 } 459