xref: /openbmc/qemu/target/hexagon/arch.c (revision cdd684b8)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "fpu/softfloat.h"
20 #include "cpu.h"
21 #include "fma_emu.h"
22 #include "arch.h"
23 #include "macros.h"
24 
25 #define SF_BIAS        127
26 #define SF_MAXEXP      254
27 #define SF_MANTBITS    23
28 #define float32_nan    make_float32(0xffffffff)
29 
30 /*
31  * These three tables are used by the cabacdecbin instruction
32  */
33 const uint8_t rLPS_table_64x4[64][4] = {
34     {128, 176, 208, 240},
35     {128, 167, 197, 227},
36     {128, 158, 187, 216},
37     {123, 150, 178, 205},
38     {116, 142, 169, 195},
39     {111, 135, 160, 185},
40     {105, 128, 152, 175},
41     {100, 122, 144, 166},
42     {95, 116, 137, 158},
43     {90, 110, 130, 150},
44     {85, 104, 123, 142},
45     {81, 99, 117, 135},
46     {77, 94, 111, 128},
47     {73, 89, 105, 122},
48     {69, 85, 100, 116},
49     {66, 80, 95, 110},
50     {62, 76, 90, 104},
51     {59, 72, 86, 99},
52     {56, 69, 81, 94},
53     {53, 65, 77, 89},
54     {51, 62, 73, 85},
55     {48, 59, 69, 80},
56     {46, 56, 66, 76},
57     {43, 53, 63, 72},
58     {41, 50, 59, 69},
59     {39, 48, 56, 65},
60     {37, 45, 54, 62},
61     {35, 43, 51, 59},
62     {33, 41, 48, 56},
63     {32, 39, 46, 53},
64     {30, 37, 43, 50},
65     {29, 35, 41, 48},
66     {27, 33, 39, 45},
67     {26, 31, 37, 43},
68     {24, 30, 35, 41},
69     {23, 28, 33, 39},
70     {22, 27, 32, 37},
71     {21, 26, 30, 35},
72     {20, 24, 29, 33},
73     {19, 23, 27, 31},
74     {18, 22, 26, 30},
75     {17, 21, 25, 28},
76     {16, 20, 23, 27},
77     {15, 19, 22, 25},
78     {14, 18, 21, 24},
79     {14, 17, 20, 23},
80     {13, 16, 19, 22},
81     {12, 15, 18, 21},
82     {12, 14, 17, 20},
83     {11, 14, 16, 19},
84     {11, 13, 15, 18},
85     {10, 12, 15, 17},
86     {10, 12, 14, 16},
87     {9, 11, 13, 15},
88     {9, 11, 12, 14},
89     {8, 10, 12, 14},
90     {8, 9, 11, 13},
91     {7, 9, 11, 12},
92     {7, 9, 10, 12},
93     {7, 8, 10, 11},
94     {6, 8, 9, 11},
95     {6, 7, 9, 10},
96     {6, 7, 8, 9},
97     {2, 2, 2, 2}
98 };
99 
100 const uint8_t AC_next_state_MPS_64[64] = {
101     1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
102     11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
103     21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
104     31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
105     41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
106     51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
107     61, 62, 62, 63
108 };
109 
110 
111 const uint8_t AC_next_state_LPS_64[64] = {
112     0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
113     8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
114     16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
115     23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
116     29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
117     34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
118     37, 38, 38, 63
119 };
120 
121 #define BITS_MASK_8 0x5555555555555555ULL
122 #define PAIR_MASK_8 0x3333333333333333ULL
123 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
124 #define BYTE_MASK_8 0x00ff00ff00ff00ffULL
125 #define HALF_MASK_8 0x0000ffff0000ffffULL
126 #define WORD_MASK_8 0x00000000ffffffffULL
127 
128 uint64_t interleave(uint32_t odd, uint32_t even)
129 {
130     /* Convert to long long */
131     uint64_t myodd = odd;
132     uint64_t myeven = even;
133     /* First, spread bits out */
134     myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
135     myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
136     myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
137     myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
138     myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
139     myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
140     myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
141     myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
142     myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
143     myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
144     /* Now OR together */
145     return myeven | (myodd << 1);
146 }
147 
148 uint64_t deinterleave(uint64_t src)
149 {
150     /* Get odd and even bits */
151     uint64_t myodd = ((src >> 1) & BITS_MASK_8);
152     uint64_t myeven = (src & BITS_MASK_8);
153 
154     /* Unspread bits */
155     myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
156     myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
157     myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
158     myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
159     myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
160     myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
161     myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
162     myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
163     myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
164     myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
165 
166     /* Return odd bits in upper half */
167     return myeven | (myodd << 32);
168 }
169 
170 int32_t conv_round(int32_t a, int n)
171 {
172     int64_t val;
173 
174     if (n == 0) {
175         val = a;
176     } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
177         /* Add LSB from int part */
178         val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
179     } else {
180         val = ((fSE32_64(a)) + (1 << (n - 1)));
181     }
182 
183     val = val >> n;
184     return (int32_t)val;
185 }
186 
187 /* Floating Point Stuff */
188 
189 static const FloatRoundMode softfloat_roundingmodes[] = {
190     float_round_nearest_even,
191     float_round_to_zero,
192     float_round_down,
193     float_round_up,
194 };
195 
196 void arch_fpop_start(CPUHexagonState *env)
197 {
198     set_float_exception_flags(0, &env->fp_status);
199     set_float_rounding_mode(
200         softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
201         &env->fp_status);
202 }
203 
204 #ifdef CONFIG_USER_ONLY
205 /*
206  * Hexagon Linux kernel only sets the relevant bits in USR (user status
207  * register).  The exception isn't raised to user mode, so we don't
208  * model it in qemu user mode.
209  */
210 #define RAISE_FP_EXCEPTION   do {} while (0)
211 #endif
212 
213 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
214     do { \
215         if (flags & FLAG) { \
216             if (GET_USR_FIELD(USR_##MYF) == 0) { \
217                 SET_USR_FIELD(USR_##MYF, 1); \
218                 if (GET_USR_FIELD(USR_##MYE)) { \
219                     RAISE_FP_EXCEPTION; \
220                 } \
221             } \
222         } \
223     } while (0)
224 
225 void arch_fpop_end(CPUHexagonState *env)
226 {
227     const bool pkt_need_commit = true;
228     int flags = get_float_exception_flags(&env->fp_status);
229     if (flags != 0) {
230         SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
231         SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
232         SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
233         SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
234         SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
235     }
236 }
237 
238 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
239                          float_status *fp_status)
240 {
241     int n_exp;
242     int d_exp;
243     int ret = 0;
244     float32 RsV, RtV, RdV;
245     int PeV = 0;
246     RsV = *Rs;
247     RtV = *Rt;
248     if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
249         if (extract32(RsV & RtV, 22, 1) == 0) {
250             float_raise(float_flag_invalid, fp_status);
251         }
252         RdV = RsV = RtV = float32_nan;
253     } else if (float32_is_any_nan(RsV)) {
254         if (extract32(RsV, 22, 1) == 0) {
255             float_raise(float_flag_invalid, fp_status);
256         }
257         RdV = RsV = RtV = float32_nan;
258     } else if (float32_is_any_nan(RtV)) {
259         /* or put NaN in num/den fixup? */
260         if (extract32(RtV, 22, 1) == 0) {
261             float_raise(float_flag_invalid, fp_status);
262         }
263         RdV = RsV = RtV = float32_nan;
264     } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
265         /* or put Inf in num fixup? */
266         RdV = RsV = RtV = float32_nan;
267         float_raise(float_flag_invalid, fp_status);
268     } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
269         /* or put zero in num fixup? */
270         RdV = RsV = RtV = float32_nan;
271         float_raise(float_flag_invalid, fp_status);
272     } else if (float32_is_zero(RtV)) {
273         /* or put Inf in num fixup? */
274         uint8_t RsV_sign = float32_is_neg(RsV);
275         uint8_t RtV_sign = float32_is_neg(RtV);
276         /* Check that RsV is NOT infinite before we overwrite it */
277         if (!float32_is_infinity(RsV)) {
278             float_raise(float_flag_divbyzero, fp_status);
279         }
280         RsV = infinite_float32(RsV_sign ^ RtV_sign);
281         RtV = float32_one;
282         RdV = float32_one;
283     } else if (float32_is_infinity(RtV)) {
284         RsV = make_float32(0x80000000 & (RsV ^ RtV));
285         RtV = float32_one;
286         RdV = float32_one;
287     } else if (float32_is_zero(RsV)) {
288         /* Does this just work itself out? */
289         /* No, 0/Inf causes problems. */
290         RsV = make_float32(0x80000000 & (RsV ^ RtV));
291         RtV = float32_one;
292         RdV = float32_one;
293     } else if (float32_is_infinity(RsV)) {
294         uint8_t RsV_sign = float32_is_neg(RsV);
295         uint8_t RtV_sign = float32_is_neg(RtV);
296         RsV = infinite_float32(RsV_sign ^ RtV_sign);
297         RtV = float32_one;
298         RdV = float32_one;
299     } else {
300         PeV = 0x00;
301         /* Basic checks passed */
302         n_exp = float32_getexp_raw(RsV);
303         d_exp = float32_getexp_raw(RtV);
304         if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
305             /* Near quotient underflow / inexact Q */
306             PeV = 0x80;
307             RtV = float32_scalbn(RtV, -64, fp_status);
308             RsV = float32_scalbn(RsV, 64, fp_status);
309         } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
310             /* Near quotient overflow */
311             PeV = 0x40;
312             RtV = float32_scalbn(RtV, 32, fp_status);
313             RsV = float32_scalbn(RsV, -32, fp_status);
314         } else if (n_exp <= SF_MANTBITS + 2) {
315             RtV = float32_scalbn(RtV, 64, fp_status);
316             RsV = float32_scalbn(RsV, 64, fp_status);
317         } else if (d_exp <= 1) {
318             RtV = float32_scalbn(RtV, 32, fp_status);
319             RsV = float32_scalbn(RsV, 32, fp_status);
320         } else if (d_exp > 252) {
321             RtV = float32_scalbn(RtV, -32, fp_status);
322             RsV = float32_scalbn(RsV, -32, fp_status);
323         }
324         RdV = 0;
325         ret = 1;
326     }
327     *Rs = RsV;
328     *Rt = RtV;
329     *Rd = RdV;
330     *adjust = PeV;
331     return ret;
332 }
333 
334 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
335                            float_status *fp_status)
336 {
337     float32 RsV, RdV;
338     int PeV = 0;
339     int r_exp;
340     int ret = 0;
341     RsV = *Rs;
342     if (float32_is_any_nan(RsV)) {
343         if (extract32(RsV, 22, 1) == 0) {
344             float_raise(float_flag_invalid, fp_status);
345         }
346         RdV = RsV = float32_nan;
347     } else if (float32_lt(RsV, float32_zero, fp_status)) {
348         /* Negative nonzero values are NaN */
349         float_raise(float_flag_invalid, fp_status);
350         RsV = float32_nan;
351         RdV = float32_nan;
352     } else if (float32_is_infinity(RsV)) {
353         /* or put Inf in num fixup? */
354         RsV = infinite_float32(1);
355         RdV = infinite_float32(1);
356     } else if (float32_is_zero(RsV)) {
357         /* or put zero in num fixup? */
358         RdV = float32_one;
359     } else {
360         PeV = 0x00;
361         /* Basic checks passed */
362         r_exp = float32_getexp(RsV);
363         if (r_exp <= 24) {
364             RsV = float32_scalbn(RsV, 64, fp_status);
365             PeV = 0xe0;
366         }
367         RdV = 0;
368         ret = 1;
369     }
370     *Rs = RsV;
371     *Rd = RdV;
372     *adjust = PeV;
373     return ret;
374 }
375 
376 const uint8_t recip_lookup_table[128] = {
377     0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
378     0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
379     0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
380     0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
381     0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
382     0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
383     0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
384     0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
385     0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
386     0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
387     0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
388     0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
389     0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
390     0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
391     0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
392     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
393 };
394 
395 const uint8_t invsqrt_lookup_table[128] = {
396     0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
397     0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
398     0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
399     0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
400     0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
401     0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
402     0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
403     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
404     0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
405     0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
406     0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
407     0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
408     0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
409     0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
410     0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
411     0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
412 };
413