1 /*
2 * Test Floating Point Conversion
3 */
4
5 /* we want additional float type definitions */
6 #define __STDC_WANT_IEC_60559_BFP_EXT__
7 #define __STDC_WANT_IEC_60559_TYPES_EXT__
8
9 #include <stdio.h>
10 #include <inttypes.h>
11 #include <math.h>
12 #include <float.h>
13 #include <fenv.h>
14
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16
17 static char flag_str[256];
18
get_flag_state(int flags)19 static char *get_flag_state(int flags)
20 {
21 if (flags) {
22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s",
23 flags & FE_OVERFLOW ? "OVERFLOW" : "",
24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "",
25 flags & FE_DIVBYZERO ? "DIV0" : "",
26 flags & FE_INEXACT ? "INEXACT" : "",
27 flags & FE_INVALID ? "INVALID" : "");
28 } else {
29 snprintf(flag_str, sizeof(flag_str), "OK");
30 }
31
32 return flag_str;
33 }
34
print_double_number(int i,double num)35 static void print_double_number(int i, double num)
36 {
37 uint64_t double_as_hex = *(uint64_t *) #
38 int flags = fetestexcept(FE_ALL_EXCEPT);
39 char *fstr = get_flag_state(flags);
40
41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n",
42 i, num, double_as_hex, flags, fstr);
43 }
44
print_single_number(int i,float num)45 static void print_single_number(int i, float num)
46 {
47 uint32_t single_as_hex = *(uint32_t *) #
48 int flags = fetestexcept(FE_ALL_EXCEPT);
49 char *fstr = get_flag_state(flags);
50
51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n",
52 i, num, single_as_hex, flags, fstr);
53 }
54
print_half_number(int i,uint16_t num)55 static void print_half_number(int i, uint16_t num)
56 {
57 int flags = fetestexcept(FE_ALL_EXCEPT);
58 char *fstr = get_flag_state(flags);
59
60 printf("%02d HALF: %#04x (%#x => %s)\n",
61 i, num, flags, fstr);
62 }
63
print_int64(int i,int64_t num)64 static void print_int64(int i, int64_t num)
65 {
66 uint64_t int64_as_hex = *(uint64_t *) #
67 int flags = fetestexcept(FE_ALL_EXCEPT);
68 char *fstr = get_flag_state(flags);
69
70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n",
71 i, num, int64_as_hex, flags, fstr);
72 }
73
74 #ifndef SNANF
75 /* Signaling NaN macros, if supported. */
76 # define SNANF (__builtin_nansf (""))
77 # define SNAN (__builtin_nans (""))
78 # define SNANL (__builtin_nansl (""))
79 #endif
80
81 float single_numbers[] = { -SNANF,
82 -NAN,
83 -INFINITY,
84 -FLT_MAX,
85 -1.111E+31,
86 -1.111E+30,
87 -1.08700982e-12,
88 -1.78051176e-20,
89 -FLT_MIN,
90 0.0,
91 FLT_MIN,
92 2.98023224e-08,
93 5.96046E-8, /* min positive FP16 subnormal */
94 6.09756E-5, /* max subnormal FP16 */
95 6.10352E-5, /* min positive normal FP16 */
96 1.0,
97 1.0009765625, /* smallest float after 1.0 FP16 */
98 2.0,
99 M_E, M_PI,
100 65503.0,
101 65504.0, /* max FP16 */
102 65505.0,
103 131007.0,
104 131008.0, /* max AFP */
105 131009.0,
106 1.111E+30,
107 FLT_MAX,
108 INFINITY,
109 NAN,
110 SNANF };
111
convert_single_to_half(void)112 static void convert_single_to_half(void)
113 {
114 int i;
115
116 printf("Converting single-precision to half-precision\n");
117
118 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
119 float input = single_numbers[i];
120
121 feclearexcept(FE_ALL_EXCEPT);
122
123 print_single_number(i, input);
124 #if defined(__arm__)
125 uint32_t output;
126 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input));
127 #else
128 uint16_t output;
129 asm("fcvt %h0, %s1" : "=w" (output) : "w" (input));
130 #endif
131 print_half_number(i, output);
132 }
133 }
134
convert_single_to_double(void)135 static void convert_single_to_double(void)
136 {
137 int i;
138
139 printf("Converting single-precision to double-precision\n");
140
141 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
142 float input = single_numbers[i];
143 /* uint64_t output; */
144 double output;
145
146 feclearexcept(FE_ALL_EXCEPT);
147
148 print_single_number(i, input);
149 #if defined(__arm__)
150 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input));
151 #else
152 asm("fcvt %d0, %s1" : "=w" (output) : "w" (input));
153 #endif
154 print_double_number(i, output);
155 }
156 }
157
convert_single_to_integer(void)158 static void convert_single_to_integer(void)
159 {
160 int i;
161
162 printf("Converting single-precision to integer\n");
163
164 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
165 float input = single_numbers[i];
166 int64_t output;
167
168 feclearexcept(FE_ALL_EXCEPT);
169
170 print_single_number(i, input);
171 #if defined(__arm__)
172 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
173 output = input;
174 #else
175 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input));
176 #endif
177 print_int64(i, output);
178 }
179 }
180
181 /* This allows us to initialise some doubles as pure hex */
182 typedef union {
183 double d;
184 uint64_t h;
185 } test_doubles;
186
187 test_doubles double_numbers[] = {
188 {SNAN},
189 {-NAN},
190 {-INFINITY},
191 {-DBL_MAX},
192 {-FLT_MAX-1.0},
193 {-FLT_MAX},
194 {-1.111E+31},
195 {-1.111E+30}, /* half prec */
196 {-2.0}, {-1.0},
197 {-DBL_MIN},
198 {-FLT_MIN},
199 {0.0},
200 {FLT_MIN},
201 {2.98023224e-08},
202 {5.96046E-8}, /* min positive FP16 subnormal */
203 {6.09756E-5}, /* max subnormal FP16 */
204 {6.10352E-5}, /* min positive normal FP16 */
205 {1.0},
206 {1.0009765625}, /* smallest float after 1.0 FP16 */
207 {DBL_MIN},
208 {1.3789972848607228e-308},
209 {1.4914738736681624e-308},
210 {1.0}, {2.0},
211 {M_E}, {M_PI},
212 {65503.0},
213 {65504.0}, /* max FP16 */
214 {65505.0},
215 {131007.0},
216 {131008.0}, /* max AFP */
217 {131009.0},
218 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */
219 {FLT_MAX},
220 {FLT_MAX + 1.0},
221 {DBL_MAX},
222 {INFINITY},
223 {NAN},
224 {.h = 0x7ff0000000000001}, /* SNAN */
225 {SNAN},
226 };
227
convert_double_to_half(void)228 static void convert_double_to_half(void)
229 {
230 int i;
231
232 printf("Converting double-precision to half-precision\n");
233
234 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
235 double input = double_numbers[i].d;
236 uint16_t output;
237
238 feclearexcept(FE_ALL_EXCEPT);
239
240 print_double_number(i, input);
241
242 /* as we don't have _Float16 support */
243 #if defined(__arm__)
244 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */
245 output = input;
246 #else
247 asm("fcvt %h0, %d1" : "=w" (output) : "w" (input));
248 #endif
249 print_half_number(i, output);
250 }
251 }
252
convert_double_to_single(void)253 static void convert_double_to_single(void)
254 {
255 int i;
256
257 printf("Converting double-precision to single-precision\n");
258
259 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
260 double input = double_numbers[i].d;
261 float output;
262
263 feclearexcept(FE_ALL_EXCEPT);
264
265 print_double_number(i, input);
266
267 #if defined(__arm__)
268 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input));
269 #else
270 asm("fcvt %s0, %d1" : "=w" (output) : "w" (input));
271 #endif
272
273 print_single_number(i, output);
274 }
275 }
276
convert_double_to_integer(void)277 static void convert_double_to_integer(void)
278 {
279 int i;
280
281 printf("Converting double-precision to integer\n");
282
283 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
284 double input = double_numbers[i].d;
285 int64_t output;
286
287 feclearexcept(FE_ALL_EXCEPT);
288
289 print_double_number(i, input);
290 #if defined(__arm__)
291 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
292 output = input;
293 #else
294 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input));
295 #endif
296 print_int64(i, output);
297 }
298 }
299
300 /* no handy defines for these numbers */
301 uint16_t half_numbers[] = {
302 0xffff, /* -NaN / AHP -Max */
303 0xfcff, /* -NaN / AHP */
304 0xfc01, /* -NaN / AHP */
305 0xfc00, /* -Inf */
306 0xfbff, /* -Max */
307 0xc000, /* -2 */
308 0xbc00, /* -1 */
309 0x8001, /* -MIN subnormal */
310 0x8000, /* -0 */
311 0x0000, /* +0 */
312 0x0001, /* MIN subnormal */
313 0x3c00, /* 1 */
314 0x7bff, /* Max */
315 0x7c00, /* Inf */
316 0x7c01, /* NaN / AHP */
317 0x7cff, /* NaN / AHP */
318 0x7fff, /* NaN / AHP +Max*/
319 };
320
convert_half_to_double(void)321 static void convert_half_to_double(void)
322 {
323 int i;
324
325 printf("Converting half-precision to double-precision\n");
326
327 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
328 uint16_t input = half_numbers[i];
329 double output;
330
331 feclearexcept(FE_ALL_EXCEPT);
332
333 print_half_number(i, input);
334 #if defined(__arm__)
335 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */
336 output = input;
337 #else
338 asm("fcvt %d0, %h1" : "=w" (output) : "w" (input));
339 #endif
340 print_double_number(i, output);
341 }
342 }
343
convert_half_to_single(void)344 static void convert_half_to_single(void)
345 {
346 int i;
347
348 printf("Converting half-precision to single-precision\n");
349
350 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
351 uint16_t input = half_numbers[i];
352 float output;
353
354 feclearexcept(FE_ALL_EXCEPT);
355
356 print_half_number(i, input);
357 #if defined(__arm__)
358 /*
359 * Clang refuses to allocate an integer to a fp register.
360 * Perform the move from a general register by hand.
361 */
362 asm("vmov %0, %1\n\t"
363 "vcvtb.f32.f16 %0, %0" : "=w" (output) : "r" (input));
364 #else
365 asm("fcvt %s0, %h1" : "=w" (output) : "w" (input));
366 #endif
367 print_single_number(i, output);
368 }
369 }
370
convert_half_to_integer(void)371 static void convert_half_to_integer(void)
372 {
373 int i;
374
375 printf("Converting half-precision to integer\n");
376
377 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
378 uint16_t input = half_numbers[i];
379 int64_t output;
380
381 feclearexcept(FE_ALL_EXCEPT);
382
383 print_half_number(i, input);
384 #if defined(__arm__)
385 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/
386 output = input;
387 #else
388 asm("fcvt %s0, %h1" : "=w" (output) : "w" (input));
389 #endif
390 print_int64(i, output);
391 }
392 }
393
394 typedef struct {
395 int flag;
396 char *desc;
397 } float_mapping;
398
399 float_mapping round_flags[] = {
400 { FE_TONEAREST, "to nearest" },
401 { FE_UPWARD, "upwards" },
402 { FE_DOWNWARD, "downwards" },
403 { FE_TOWARDZERO, "to zero" }
404 };
405
main(int argc,char * argv[argc])406 int main(int argc, char *argv[argc])
407 {
408 int i;
409
410 printf("#### Enabling IEEE Half Precision\n");
411
412 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
413 fesetround(round_flags[i].flag);
414 printf("### Rounding %s\n", round_flags[i].desc);
415 convert_single_to_half();
416 convert_single_to_double();
417 convert_double_to_half();
418 convert_double_to_single();
419 convert_half_to_single();
420 convert_half_to_double();
421 }
422
423 /* convert to integer */
424 convert_single_to_integer();
425 convert_double_to_integer();
426 convert_half_to_integer();
427
428 /* And now with ARM alternative FP16 */
429 #if defined(__arm__)
430 asm("vmrs r1, fpscr\n\t"
431 "orr r1, r1, %[flags]\n\t"
432 "vmsr fpscr, r1"
433 : /* no output */ : [flags] "n" (1 << 26) : "r1" );
434 #else
435 asm("mrs x1, fpcr\n\t"
436 "orr x1, x1, %[flags]\n\t"
437 "msr fpcr, x1\n\t"
438 : /* no output */ : [flags] "n" (1 << 26) : "x1" );
439 #endif
440
441 printf("#### Enabling ARM Alternative Half Precision\n");
442
443 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
444 fesetround(round_flags[i].flag);
445 printf("### Rounding %s\n", round_flags[i].desc);
446 convert_single_to_half();
447 convert_single_to_double();
448 convert_double_to_half();
449 convert_double_to_single();
450 convert_half_to_single();
451 convert_half_to_double();
452 }
453
454 /* convert to integer */
455 convert_single_to_integer();
456 convert_double_to_integer();
457 convert_half_to_integer();
458
459 return 0;
460 }
461