1b2391681STaylor Simpson /*
2*d54c5615STaylor Simpson * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3b2391681STaylor Simpson *
4b2391681STaylor Simpson * This program is free software; you can redistribute it and/or modify
5b2391681STaylor Simpson * it under the terms of the GNU General Public License as published by
6b2391681STaylor Simpson * the Free Software Foundation; either version 2 of the License, or
7b2391681STaylor Simpson * (at your option) any later version.
8b2391681STaylor Simpson *
9b2391681STaylor Simpson * This program is distributed in the hope that it will be useful,
10b2391681STaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of
11b2391681STaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12b2391681STaylor Simpson * GNU General Public License for more details.
13b2391681STaylor Simpson *
14b2391681STaylor Simpson * You should have received a copy of the GNU General Public License
15b2391681STaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>.
16b2391681STaylor Simpson */
17b2391681STaylor Simpson
18b2391681STaylor Simpson #include "qemu/osdep.h"
19b2391681STaylor Simpson #include "fpu/softfloat.h"
20b2391681STaylor Simpson #include "cpu.h"
21b2391681STaylor Simpson #include "fma_emu.h"
22b2391681STaylor Simpson #include "arch.h"
23b2391681STaylor Simpson #include "macros.h"
24b2391681STaylor Simpson
25b2391681STaylor Simpson #define SF_BIAS 127
26b2391681STaylor Simpson #define SF_MAXEXP 254
27b2391681STaylor Simpson #define SF_MANTBITS 23
28b2391681STaylor Simpson #define float32_nan make_float32(0xffffffff)
29b2391681STaylor Simpson
30e628c015STaylor Simpson /*
31e628c015STaylor Simpson * These three tables are used by the cabacdecbin instruction
32e628c015STaylor Simpson */
33e628c015STaylor Simpson const uint8_t rLPS_table_64x4[64][4] = {
34e628c015STaylor Simpson {128, 176, 208, 240},
35e628c015STaylor Simpson {128, 167, 197, 227},
36e628c015STaylor Simpson {128, 158, 187, 216},
37e628c015STaylor Simpson {123, 150, 178, 205},
38e628c015STaylor Simpson {116, 142, 169, 195},
39e628c015STaylor Simpson {111, 135, 160, 185},
40e628c015STaylor Simpson {105, 128, 152, 175},
41e628c015STaylor Simpson {100, 122, 144, 166},
42e628c015STaylor Simpson {95, 116, 137, 158},
43e628c015STaylor Simpson {90, 110, 130, 150},
44e628c015STaylor Simpson {85, 104, 123, 142},
45e628c015STaylor Simpson {81, 99, 117, 135},
46e628c015STaylor Simpson {77, 94, 111, 128},
47e628c015STaylor Simpson {73, 89, 105, 122},
48e628c015STaylor Simpson {69, 85, 100, 116},
49e628c015STaylor Simpson {66, 80, 95, 110},
50e628c015STaylor Simpson {62, 76, 90, 104},
51e628c015STaylor Simpson {59, 72, 86, 99},
52e628c015STaylor Simpson {56, 69, 81, 94},
53e628c015STaylor Simpson {53, 65, 77, 89},
54e628c015STaylor Simpson {51, 62, 73, 85},
55e628c015STaylor Simpson {48, 59, 69, 80},
56e628c015STaylor Simpson {46, 56, 66, 76},
57e628c015STaylor Simpson {43, 53, 63, 72},
58e628c015STaylor Simpson {41, 50, 59, 69},
59e628c015STaylor Simpson {39, 48, 56, 65},
60e628c015STaylor Simpson {37, 45, 54, 62},
61e628c015STaylor Simpson {35, 43, 51, 59},
62e628c015STaylor Simpson {33, 41, 48, 56},
63e628c015STaylor Simpson {32, 39, 46, 53},
64e628c015STaylor Simpson {30, 37, 43, 50},
65e628c015STaylor Simpson {29, 35, 41, 48},
66e628c015STaylor Simpson {27, 33, 39, 45},
67e628c015STaylor Simpson {26, 31, 37, 43},
68e628c015STaylor Simpson {24, 30, 35, 41},
69e628c015STaylor Simpson {23, 28, 33, 39},
70e628c015STaylor Simpson {22, 27, 32, 37},
71e628c015STaylor Simpson {21, 26, 30, 35},
72e628c015STaylor Simpson {20, 24, 29, 33},
73e628c015STaylor Simpson {19, 23, 27, 31},
74e628c015STaylor Simpson {18, 22, 26, 30},
75e628c015STaylor Simpson {17, 21, 25, 28},
76e628c015STaylor Simpson {16, 20, 23, 27},
77e628c015STaylor Simpson {15, 19, 22, 25},
78e628c015STaylor Simpson {14, 18, 21, 24},
79e628c015STaylor Simpson {14, 17, 20, 23},
80e628c015STaylor Simpson {13, 16, 19, 22},
81e628c015STaylor Simpson {12, 15, 18, 21},
82e628c015STaylor Simpson {12, 14, 17, 20},
83e628c015STaylor Simpson {11, 14, 16, 19},
84e628c015STaylor Simpson {11, 13, 15, 18},
85e628c015STaylor Simpson {10, 12, 15, 17},
86e628c015STaylor Simpson {10, 12, 14, 16},
87e628c015STaylor Simpson {9, 11, 13, 15},
88e628c015STaylor Simpson {9, 11, 12, 14},
89e628c015STaylor Simpson {8, 10, 12, 14},
90e628c015STaylor Simpson {8, 9, 11, 13},
91e628c015STaylor Simpson {7, 9, 11, 12},
92e628c015STaylor Simpson {7, 9, 10, 12},
93e628c015STaylor Simpson {7, 8, 10, 11},
94e628c015STaylor Simpson {6, 8, 9, 11},
95e628c015STaylor Simpson {6, 7, 9, 10},
96e628c015STaylor Simpson {6, 7, 8, 9},
97e628c015STaylor Simpson {2, 2, 2, 2}
98e628c015STaylor Simpson };
99e628c015STaylor Simpson
100e628c015STaylor Simpson const uint8_t AC_next_state_MPS_64[64] = {
101e628c015STaylor Simpson 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
102e628c015STaylor Simpson 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
103e628c015STaylor Simpson 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
104e628c015STaylor Simpson 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
105e628c015STaylor Simpson 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
106e628c015STaylor Simpson 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
107e628c015STaylor Simpson 61, 62, 62, 63
108e628c015STaylor Simpson };
109e628c015STaylor Simpson
110e628c015STaylor Simpson
111e628c015STaylor Simpson const uint8_t AC_next_state_LPS_64[64] = {
112e628c015STaylor Simpson 0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
113e628c015STaylor Simpson 8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
114e628c015STaylor Simpson 16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
115e628c015STaylor Simpson 23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
116e628c015STaylor Simpson 29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
117e628c015STaylor Simpson 34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
118e628c015STaylor Simpson 37, 38, 38, 63
119e628c015STaylor Simpson };
120e628c015STaylor Simpson
121b2391681STaylor Simpson #define BITS_MASK_8 0x5555555555555555ULL
122b2391681STaylor Simpson #define PAIR_MASK_8 0x3333333333333333ULL
123b2391681STaylor Simpson #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
124b2391681STaylor Simpson #define BYTE_MASK_8 0x00ff00ff00ff00ffULL
125b2391681STaylor Simpson #define HALF_MASK_8 0x0000ffff0000ffffULL
126b2391681STaylor Simpson #define WORD_MASK_8 0x00000000ffffffffULL
127b2391681STaylor Simpson
interleave(uint32_t odd,uint32_t even)128b2391681STaylor Simpson uint64_t interleave(uint32_t odd, uint32_t even)
129b2391681STaylor Simpson {
130b2391681STaylor Simpson /* Convert to long long */
131b2391681STaylor Simpson uint64_t myodd = odd;
132b2391681STaylor Simpson uint64_t myeven = even;
133b2391681STaylor Simpson /* First, spread bits out */
134b2391681STaylor Simpson myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
135b2391681STaylor Simpson myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
136b2391681STaylor Simpson myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
137b2391681STaylor Simpson myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
138b2391681STaylor Simpson myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
139b2391681STaylor Simpson myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
140b2391681STaylor Simpson myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
141b2391681STaylor Simpson myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
142b2391681STaylor Simpson myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
143b2391681STaylor Simpson myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
144b2391681STaylor Simpson /* Now OR together */
145b2391681STaylor Simpson return myeven | (myodd << 1);
146b2391681STaylor Simpson }
147b2391681STaylor Simpson
deinterleave(uint64_t src)148b2391681STaylor Simpson uint64_t deinterleave(uint64_t src)
149b2391681STaylor Simpson {
150b2391681STaylor Simpson /* Get odd and even bits */
151b2391681STaylor Simpson uint64_t myodd = ((src >> 1) & BITS_MASK_8);
152b2391681STaylor Simpson uint64_t myeven = (src & BITS_MASK_8);
153b2391681STaylor Simpson
154b2391681STaylor Simpson /* Unspread bits */
155b2391681STaylor Simpson myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
156b2391681STaylor Simpson myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
157b2391681STaylor Simpson myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
158b2391681STaylor Simpson myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
159b2391681STaylor Simpson myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
160b2391681STaylor Simpson myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
161b2391681STaylor Simpson myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
162b2391681STaylor Simpson myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
163b2391681STaylor Simpson myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
164b2391681STaylor Simpson myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
165b2391681STaylor Simpson
166b2391681STaylor Simpson /* Return odd bits in upper half */
167b2391681STaylor Simpson return myeven | (myodd << 32);
168b2391681STaylor Simpson }
169b2391681STaylor Simpson
conv_round(int32_t a,int n)170b2391681STaylor Simpson int32_t conv_round(int32_t a, int n)
171b2391681STaylor Simpson {
172b2391681STaylor Simpson int64_t val;
173b2391681STaylor Simpson
174b2391681STaylor Simpson if (n == 0) {
175b2391681STaylor Simpson val = a;
176b2391681STaylor Simpson } else if ((a & ((1 << (n - 1)) - 1)) == 0) { /* N-1..0 all zero? */
177b2391681STaylor Simpson /* Add LSB from int part */
178b2391681STaylor Simpson val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
179b2391681STaylor Simpson } else {
180b2391681STaylor Simpson val = ((fSE32_64(a)) + (1 << (n - 1)));
181b2391681STaylor Simpson }
182b2391681STaylor Simpson
183b2391681STaylor Simpson val = val >> n;
184b2391681STaylor Simpson return (int32_t)val;
185b2391681STaylor Simpson }
186b2391681STaylor Simpson
187b2391681STaylor Simpson /* Floating Point Stuff */
188b2391681STaylor Simpson
1898c367524STaylor Simpson static const FloatRoundMode softfloat_roundingmodes[] = {
190b2391681STaylor Simpson float_round_nearest_even,
191b2391681STaylor Simpson float_round_to_zero,
192b2391681STaylor Simpson float_round_down,
193b2391681STaylor Simpson float_round_up,
194b2391681STaylor Simpson };
195b2391681STaylor Simpson
arch_fpop_start(CPUHexagonState * env)196b2391681STaylor Simpson void arch_fpop_start(CPUHexagonState *env)
197b2391681STaylor Simpson {
198b2391681STaylor Simpson set_float_exception_flags(0, &env->fp_status);
199b2391681STaylor Simpson set_float_rounding_mode(
200b2391681STaylor Simpson softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
201b2391681STaylor Simpson &env->fp_status);
202b2391681STaylor Simpson }
203b2391681STaylor Simpson
204b2391681STaylor Simpson #ifdef CONFIG_USER_ONLY
205b2391681STaylor Simpson /*
206b2391681STaylor Simpson * Hexagon Linux kernel only sets the relevant bits in USR (user status
207b2391681STaylor Simpson * register). The exception isn't raised to user mode, so we don't
208b2391681STaylor Simpson * model it in qemu user mode.
209b2391681STaylor Simpson */
210b2391681STaylor Simpson #define RAISE_FP_EXCEPTION do {} while (0)
211b2391681STaylor Simpson #endif
212b2391681STaylor Simpson
213b2391681STaylor Simpson #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
214b2391681STaylor Simpson do { \
215b2391681STaylor Simpson if (flags & FLAG) { \
216b2391681STaylor Simpson if (GET_USR_FIELD(USR_##MYF) == 0) { \
217b2391681STaylor Simpson SET_USR_FIELD(USR_##MYF, 1); \
218b2391681STaylor Simpson if (GET_USR_FIELD(USR_##MYE)) { \
219b2391681STaylor Simpson RAISE_FP_EXCEPTION; \
220b2391681STaylor Simpson } \
221b2391681STaylor Simpson } \
222b2391681STaylor Simpson } \
223b2391681STaylor Simpson } while (0)
224b2391681STaylor Simpson
arch_fpop_end(CPUHexagonState * env)225b2391681STaylor Simpson void arch_fpop_end(CPUHexagonState *env)
226b2391681STaylor Simpson {
227*d54c5615STaylor Simpson const bool pkt_need_commit = true;
228b2391681STaylor Simpson int flags = get_float_exception_flags(&env->fp_status);
229b2391681STaylor Simpson if (flags != 0) {
230b2391681STaylor Simpson SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
231b2391681STaylor Simpson SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
232b2391681STaylor Simpson SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
233b2391681STaylor Simpson SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
234b2391681STaylor Simpson SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
235b2391681STaylor Simpson }
236b2391681STaylor Simpson }
237b2391681STaylor Simpson
arch_sf_recip_common(float32 * Rs,float32 * Rt,float32 * Rd,int * adjust,float_status * fp_status)238b2391681STaylor Simpson int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
239b2391681STaylor Simpson float_status *fp_status)
240b2391681STaylor Simpson {
241b2391681STaylor Simpson int n_exp;
242b2391681STaylor Simpson int d_exp;
243b2391681STaylor Simpson int ret = 0;
244b2391681STaylor Simpson float32 RsV, RtV, RdV;
245b2391681STaylor Simpson int PeV = 0;
246b2391681STaylor Simpson RsV = *Rs;
247b2391681STaylor Simpson RtV = *Rt;
248b2391681STaylor Simpson if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
249b2391681STaylor Simpson if (extract32(RsV & RtV, 22, 1) == 0) {
250b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
251b2391681STaylor Simpson }
252b2391681STaylor Simpson RdV = RsV = RtV = float32_nan;
253b2391681STaylor Simpson } else if (float32_is_any_nan(RsV)) {
254b2391681STaylor Simpson if (extract32(RsV, 22, 1) == 0) {
255b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
256b2391681STaylor Simpson }
257b2391681STaylor Simpson RdV = RsV = RtV = float32_nan;
258b2391681STaylor Simpson } else if (float32_is_any_nan(RtV)) {
259b2391681STaylor Simpson /* or put NaN in num/den fixup? */
260b2391681STaylor Simpson if (extract32(RtV, 22, 1) == 0) {
261b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
262b2391681STaylor Simpson }
263b2391681STaylor Simpson RdV = RsV = RtV = float32_nan;
264b2391681STaylor Simpson } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
265b2391681STaylor Simpson /* or put Inf in num fixup? */
266b2391681STaylor Simpson RdV = RsV = RtV = float32_nan;
267b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
268b2391681STaylor Simpson } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
269b2391681STaylor Simpson /* or put zero in num fixup? */
270b2391681STaylor Simpson RdV = RsV = RtV = float32_nan;
271b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
272b2391681STaylor Simpson } else if (float32_is_zero(RtV)) {
273b2391681STaylor Simpson /* or put Inf in num fixup? */
274b2391681STaylor Simpson uint8_t RsV_sign = float32_is_neg(RsV);
275b2391681STaylor Simpson uint8_t RtV_sign = float32_is_neg(RtV);
276d934c16dSTaylor Simpson /* Check that RsV is NOT infinite before we overwrite it */
277d934c16dSTaylor Simpson if (!float32_is_infinity(RsV)) {
278d934c16dSTaylor Simpson float_raise(float_flag_divbyzero, fp_status);
279d934c16dSTaylor Simpson }
280b2391681STaylor Simpson RsV = infinite_float32(RsV_sign ^ RtV_sign);
281b2391681STaylor Simpson RtV = float32_one;
282b2391681STaylor Simpson RdV = float32_one;
283b2391681STaylor Simpson } else if (float32_is_infinity(RtV)) {
284b2391681STaylor Simpson RsV = make_float32(0x80000000 & (RsV ^ RtV));
285b2391681STaylor Simpson RtV = float32_one;
286b2391681STaylor Simpson RdV = float32_one;
287b2391681STaylor Simpson } else if (float32_is_zero(RsV)) {
288b2391681STaylor Simpson /* Does this just work itself out? */
289b2391681STaylor Simpson /* No, 0/Inf causes problems. */
290b2391681STaylor Simpson RsV = make_float32(0x80000000 & (RsV ^ RtV));
291b2391681STaylor Simpson RtV = float32_one;
292b2391681STaylor Simpson RdV = float32_one;
293b2391681STaylor Simpson } else if (float32_is_infinity(RsV)) {
294b2391681STaylor Simpson uint8_t RsV_sign = float32_is_neg(RsV);
295b2391681STaylor Simpson uint8_t RtV_sign = float32_is_neg(RtV);
296b2391681STaylor Simpson RsV = infinite_float32(RsV_sign ^ RtV_sign);
297b2391681STaylor Simpson RtV = float32_one;
298b2391681STaylor Simpson RdV = float32_one;
299b2391681STaylor Simpson } else {
300b2391681STaylor Simpson PeV = 0x00;
301b2391681STaylor Simpson /* Basic checks passed */
30277ccf444STaylor Simpson n_exp = float32_getexp_raw(RsV);
30377ccf444STaylor Simpson d_exp = float32_getexp_raw(RtV);
304b2391681STaylor Simpson if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
305b2391681STaylor Simpson /* Near quotient underflow / inexact Q */
306b2391681STaylor Simpson PeV = 0x80;
3071cb532feSTaylor Simpson RtV = float32_scalbn(RtV, -64, fp_status);
3081cb532feSTaylor Simpson RsV = float32_scalbn(RsV, 64, fp_status);
309b2391681STaylor Simpson } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
310b2391681STaylor Simpson /* Near quotient overflow */
311b2391681STaylor Simpson PeV = 0x40;
3121cb532feSTaylor Simpson RtV = float32_scalbn(RtV, 32, fp_status);
3131cb532feSTaylor Simpson RsV = float32_scalbn(RsV, -32, fp_status);
314b2391681STaylor Simpson } else if (n_exp <= SF_MANTBITS + 2) {
3151cb532feSTaylor Simpson RtV = float32_scalbn(RtV, 64, fp_status);
3161cb532feSTaylor Simpson RsV = float32_scalbn(RsV, 64, fp_status);
317b2391681STaylor Simpson } else if (d_exp <= 1) {
3181cb532feSTaylor Simpson RtV = float32_scalbn(RtV, 32, fp_status);
3191cb532feSTaylor Simpson RsV = float32_scalbn(RsV, 32, fp_status);
320b2391681STaylor Simpson } else if (d_exp > 252) {
3211cb532feSTaylor Simpson RtV = float32_scalbn(RtV, -32, fp_status);
3221cb532feSTaylor Simpson RsV = float32_scalbn(RsV, -32, fp_status);
323b2391681STaylor Simpson }
324b2391681STaylor Simpson RdV = 0;
325b2391681STaylor Simpson ret = 1;
326b2391681STaylor Simpson }
327b2391681STaylor Simpson *Rs = RsV;
328b2391681STaylor Simpson *Rt = RtV;
329b2391681STaylor Simpson *Rd = RdV;
330b2391681STaylor Simpson *adjust = PeV;
331b2391681STaylor Simpson return ret;
332b2391681STaylor Simpson }
333b2391681STaylor Simpson
arch_sf_invsqrt_common(float32 * Rs,float32 * Rd,int * adjust,float_status * fp_status)334b2391681STaylor Simpson int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
335b2391681STaylor Simpson float_status *fp_status)
336b2391681STaylor Simpson {
337b2391681STaylor Simpson float32 RsV, RdV;
338b2391681STaylor Simpson int PeV = 0;
339b2391681STaylor Simpson int r_exp;
340b2391681STaylor Simpson int ret = 0;
341b2391681STaylor Simpson RsV = *Rs;
342dd8705bdSTaylor Simpson if (float32_is_any_nan(RsV)) {
343b2391681STaylor Simpson if (extract32(RsV, 22, 1) == 0) {
344b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
345b2391681STaylor Simpson }
346b2391681STaylor Simpson RdV = RsV = float32_nan;
347b2391681STaylor Simpson } else if (float32_lt(RsV, float32_zero, fp_status)) {
348b2391681STaylor Simpson /* Negative nonzero values are NaN */
349b2391681STaylor Simpson float_raise(float_flag_invalid, fp_status);
350b2391681STaylor Simpson RsV = float32_nan;
351b2391681STaylor Simpson RdV = float32_nan;
352b2391681STaylor Simpson } else if (float32_is_infinity(RsV)) {
353b2391681STaylor Simpson /* or put Inf in num fixup? */
354b2391681STaylor Simpson RsV = infinite_float32(1);
355b2391681STaylor Simpson RdV = infinite_float32(1);
356b2391681STaylor Simpson } else if (float32_is_zero(RsV)) {
357b2391681STaylor Simpson /* or put zero in num fixup? */
358b2391681STaylor Simpson RdV = float32_one;
359b2391681STaylor Simpson } else {
360b2391681STaylor Simpson PeV = 0x00;
361b2391681STaylor Simpson /* Basic checks passed */
362b2391681STaylor Simpson r_exp = float32_getexp(RsV);
363b2391681STaylor Simpson if (r_exp <= 24) {
3641cb532feSTaylor Simpson RsV = float32_scalbn(RsV, 64, fp_status);
365b2391681STaylor Simpson PeV = 0xe0;
366b2391681STaylor Simpson }
367b2391681STaylor Simpson RdV = 0;
368b2391681STaylor Simpson ret = 1;
369b2391681STaylor Simpson }
370b2391681STaylor Simpson *Rs = RsV;
371b2391681STaylor Simpson *Rd = RdV;
372b2391681STaylor Simpson *adjust = PeV;
373b2391681STaylor Simpson return ret;
374b2391681STaylor Simpson }
375d934c16dSTaylor Simpson
376d934c16dSTaylor Simpson const uint8_t recip_lookup_table[128] = {
377d934c16dSTaylor Simpson 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
378d934c16dSTaylor Simpson 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
379d934c16dSTaylor Simpson 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
380d934c16dSTaylor Simpson 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
381d934c16dSTaylor Simpson 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
382d934c16dSTaylor Simpson 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
383d934c16dSTaylor Simpson 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
384d934c16dSTaylor Simpson 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
385d934c16dSTaylor Simpson 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
386d934c16dSTaylor Simpson 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
387d934c16dSTaylor Simpson 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
388d934c16dSTaylor Simpson 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
389d934c16dSTaylor Simpson 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
390d934c16dSTaylor Simpson 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
391d934c16dSTaylor Simpson 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
392d934c16dSTaylor Simpson 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
393d934c16dSTaylor Simpson };
394dd8705bdSTaylor Simpson
395dd8705bdSTaylor Simpson const uint8_t invsqrt_lookup_table[128] = {
396dd8705bdSTaylor Simpson 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
397dd8705bdSTaylor Simpson 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
398dd8705bdSTaylor Simpson 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
399dd8705bdSTaylor Simpson 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
400dd8705bdSTaylor Simpson 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
401dd8705bdSTaylor Simpson 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
402dd8705bdSTaylor Simpson 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
403dd8705bdSTaylor Simpson 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
404dd8705bdSTaylor Simpson 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
405dd8705bdSTaylor Simpson 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
406dd8705bdSTaylor Simpson 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
407dd8705bdSTaylor Simpson 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
408dd8705bdSTaylor Simpson 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
409dd8705bdSTaylor Simpson 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
410dd8705bdSTaylor Simpson 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
411dd8705bdSTaylor Simpson 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
412dd8705bdSTaylor Simpson };
413