1 /*
2 * Common Float Helpers
3 *
4 * This contains a series of useful utility routines and a set of
5 * floating point constants useful for exercising the edge cases in
6 * floating point tests.
7 *
8 * Copyright (c) 2019, 2024 Linaro
9 *
10 * SPDX-License-Identifier: GPL-2.0-or-later
11 */
12
13 /* we want additional float type definitions */
14 #define __STDC_WANT_IEC_60559_BFP_EXT__
15 #define __STDC_WANT_IEC_60559_TYPES_EXT__
16
17 #define _GNU_SOURCE
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <inttypes.h>
21 #include <math.h>
22 #include <float.h>
23 #include <fenv.h>
24
25 #include "../float_helpers.h"
26
27 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
28
29 /*
30 * Half Precision Numbers
31 *
32 * Not yet well standardised so we return a plain uint16_t for now.
33 */
34
35 /* no handy defines for these numbers */
36 static uint16_t f16_numbers[] = {
37 0xffff, /* -NaN / AHP -Max */
38 0xfcff, /* -NaN / AHP */
39 0xfc01, /* -NaN / AHP */
40 0xfc00, /* -Inf */
41 0xfbff, /* -Max */
42 0xc000, /* -2 */
43 0xbc00, /* -1 */
44 0x8001, /* -MIN subnormal */
45 0x8000, /* -0 */
46 0x0000, /* +0 */
47 0x0001, /* MIN subnormal */
48 0x3c00, /* 1 */
49 0x7bff, /* Max */
50 0x7c00, /* Inf */
51 0x7c01, /* NaN / AHP */
52 0x7cff, /* NaN / AHP */
53 0x7fff, /* NaN / AHP +Max*/
54 };
55
56 static const int num_f16 = ARRAY_SIZE(f16_numbers);
57
get_num_f16(void)58 int get_num_f16(void)
59 {
60 return num_f16;
61 }
62
get_f16(int i)63 uint16_t get_f16(int i)
64 {
65 return f16_numbers[i % num_f16];
66 }
67
68 /* only display as hex */
fmt_16(uint16_t num)69 char *fmt_16(uint16_t num)
70 {
71 char *fmt;
72 asprintf(&fmt, "f16(%#04x)", num);
73 return fmt;
74 }
75
76 /*
77 * Single Precision Numbers
78 */
79
80 #ifndef SNANF
81 /* Signaling NaN macros, if supported. */
82 # define SNANF (__builtin_nansf (""))
83 # define SNAN (__builtin_nans (""))
84 # define SNANL (__builtin_nansl (""))
85 #endif
86
87 static float f32_numbers[] = {
88 -SNANF,
89 -NAN,
90 -INFINITY,
91 -FLT_MAX,
92 -0x1.1874b2p+103,
93 -0x1.c0bab6p+99,
94 -0x1.31f75p-40,
95 -0x1.505444p-66,
96 -FLT_MIN,
97 0.0,
98 FLT_MIN,
99 0x1p-25,
100 0x1.ffffe6p-25, /* min positive FP16 subnormal */
101 0x1.ff801ap-15, /* max subnormal FP16 */
102 0x1.00000cp-14, /* min positive normal FP16 */
103 1.0,
104 0x1.004p+0, /* smallest float after 1.0 FP16 */
105 2.0,
106 M_E, M_PI,
107 0x1.ffbep+15,
108 0x1.ffcp+15, /* max FP16 */
109 0x1.ffc2p+15,
110 0x1.ffbfp+16,
111 0x1.ffcp+16, /* max AFP */
112 0x1.ffc1p+16,
113 0x1.c0bab6p+99,
114 FLT_MAX,
115 INFINITY,
116 NAN,
117 SNANF
118 };
119
120 static const int num_f32 = ARRAY_SIZE(f32_numbers);
121
get_num_f32(void)122 int get_num_f32(void)
123 {
124 return num_f32;
125 }
126
get_f32(int i)127 float get_f32(int i)
128 {
129 return f32_numbers[i % num_f32];
130 }
131
fmt_f32(float num)132 char *fmt_f32(float num)
133 {
134 uint32_t single_as_hex = *(uint32_t *) #
135 char *fmt;
136 asprintf(&fmt, "f32(%02.20a:%#010x)", num, single_as_hex);
137 return fmt;
138 }
139
140
141 /* This allows us to initialise some doubles as pure hex */
142 typedef union {
143 double d;
144 uint64_t h;
145 } test_doubles;
146
147 static test_doubles f64_numbers[] = {
148 {SNAN},
149 {-NAN},
150 {-INFINITY},
151 {-DBL_MAX},
152 {-FLT_MAX-1.0},
153 {-FLT_MAX},
154 {-1.111E+31},
155 {-1.111E+30}, /* half prec */
156 {-2.0}, {-1.0},
157 {-DBL_MIN},
158 {-FLT_MIN},
159 {0.0},
160 {FLT_MIN},
161 {2.98023224e-08},
162 {5.96046E-8}, /* min positive FP16 subnormal */
163 {6.09756E-5}, /* max subnormal FP16 */
164 {6.10352E-5}, /* min positive normal FP16 */
165 {1.0},
166 {1.0009765625}, /* smallest float after 1.0 FP16 */
167 {DBL_MIN},
168 {1.3789972848607228e-308},
169 {1.4914738736681624e-308},
170 {1.0}, {2.0},
171 {M_E}, {M_PI},
172 {65503.0},
173 {65504.0}, /* max FP16 */
174 {65505.0},
175 {131007.0},
176 {131008.0}, /* max AFP */
177 {131009.0},
178 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */
179 {FLT_MAX},
180 {FLT_MAX + 1.0},
181 {DBL_MAX},
182 {INFINITY},
183 {NAN},
184 {.h = 0x7ff0000000000001}, /* SNAN */
185 {SNAN},
186 };
187
188 static const int num_f64 = ARRAY_SIZE(f64_numbers);
189
get_num_f64(void)190 int get_num_f64(void)
191 {
192 return num_f64;
193 }
194
get_f64(int i)195 double get_f64(int i)
196 {
197 return f64_numbers[i % num_f64].d;
198 }
199
fmt_f64(double num)200 char *fmt_f64(double num)
201 {
202 uint64_t double_as_hex = *(uint64_t *) #
203 char *fmt;
204 asprintf(&fmt, "f64(%02.20a:%#020" PRIx64 ")", num, double_as_hex);
205 return fmt;
206 }
207
208 /*
209 * Float flags
210 */
fmt_flags(void)211 char *fmt_flags(void)
212 {
213 int flags = fetestexcept(FE_ALL_EXCEPT);
214 char *fmt;
215
216 if (flags) {
217 asprintf(&fmt, "%s%s%s%s%s",
218 flags & FE_OVERFLOW ? "OVERFLOW " : "",
219 flags & FE_UNDERFLOW ? "UNDERFLOW " : "",
220 flags & FE_DIVBYZERO ? "DIV0 " : "",
221 flags & FE_INEXACT ? "INEXACT " : "",
222 flags & FE_INVALID ? "INVALID" : "");
223 } else {
224 asprintf(&fmt, "OK");
225 }
226
227 return fmt;
228 }
229