xref: /openbmc/qemu/target/hexagon/arch.c (revision 449d6d9eb44772e69f11d002e3c1e2be8a91c350)
1  /*
2   *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3   *
4   *  This program is free software; you can redistribute it and/or modify
5   *  it under the terms of the GNU General Public License as published by
6   *  the Free Software Foundation; either version 2 of the License, or
7   *  (at your option) any later version.
8   *
9   *  This program is distributed in the hope that it will be useful,
10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   *  GNU General Public License for more details.
13   *
14   *  You should have received a copy of the GNU General Public License
15   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16   */
17  
18  #include "qemu/osdep.h"
19  #include "fpu/softfloat.h"
20  #include "cpu.h"
21  #include "fma_emu.h"
22  #include "arch.h"
23  #include "macros.h"
24  
25  #define SF_BIAS        127
26  #define SF_MAXEXP      254
27  #define SF_MANTBITS    23
28  #define float32_nan    make_float32(0xffffffff)
29  
30  /*
31   * These three tables are used by the cabacdecbin instruction
32   */
33  const uint8_t rLPS_table_64x4[64][4] = {
34      {128, 176, 208, 240},
35      {128, 167, 197, 227},
36      {128, 158, 187, 216},
37      {123, 150, 178, 205},
38      {116, 142, 169, 195},
39      {111, 135, 160, 185},
40      {105, 128, 152, 175},
41      {100, 122, 144, 166},
42      {95, 116, 137, 158},
43      {90, 110, 130, 150},
44      {85, 104, 123, 142},
45      {81, 99, 117, 135},
46      {77, 94, 111, 128},
47      {73, 89, 105, 122},
48      {69, 85, 100, 116},
49      {66, 80, 95, 110},
50      {62, 76, 90, 104},
51      {59, 72, 86, 99},
52      {56, 69, 81, 94},
53      {53, 65, 77, 89},
54      {51, 62, 73, 85},
55      {48, 59, 69, 80},
56      {46, 56, 66, 76},
57      {43, 53, 63, 72},
58      {41, 50, 59, 69},
59      {39, 48, 56, 65},
60      {37, 45, 54, 62},
61      {35, 43, 51, 59},
62      {33, 41, 48, 56},
63      {32, 39, 46, 53},
64      {30, 37, 43, 50},
65      {29, 35, 41, 48},
66      {27, 33, 39, 45},
67      {26, 31, 37, 43},
68      {24, 30, 35, 41},
69      {23, 28, 33, 39},
70      {22, 27, 32, 37},
71      {21, 26, 30, 35},
72      {20, 24, 29, 33},
73      {19, 23, 27, 31},
74      {18, 22, 26, 30},
75      {17, 21, 25, 28},
76      {16, 20, 23, 27},
77      {15, 19, 22, 25},
78      {14, 18, 21, 24},
79      {14, 17, 20, 23},
80      {13, 16, 19, 22},
81      {12, 15, 18, 21},
82      {12, 14, 17, 20},
83      {11, 14, 16, 19},
84      {11, 13, 15, 18},
85      {10, 12, 15, 17},
86      {10, 12, 14, 16},
87      {9, 11, 13, 15},
88      {9, 11, 12, 14},
89      {8, 10, 12, 14},
90      {8, 9, 11, 13},
91      {7, 9, 11, 12},
92      {7, 9, 10, 12},
93      {7, 8, 10, 11},
94      {6, 8, 9, 11},
95      {6, 7, 9, 10},
96      {6, 7, 8, 9},
97      {2, 2, 2, 2}
98  };
99  
100  const uint8_t AC_next_state_MPS_64[64] = {
101      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
102      11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
103      21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
104      31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
105      41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
106      51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
107      61, 62, 62, 63
108  };
109  
110  
111  const uint8_t AC_next_state_LPS_64[64] = {
112      0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
113      8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
114      16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
115      23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
116      29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
117      34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
118      37, 38, 38, 63
119  };
120  
121  #define BITS_MASK_8 0x5555555555555555ULL
122  #define PAIR_MASK_8 0x3333333333333333ULL
123  #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
124  #define BYTE_MASK_8 0x00ff00ff00ff00ffULL
125  #define HALF_MASK_8 0x0000ffff0000ffffULL
126  #define WORD_MASK_8 0x00000000ffffffffULL
127  
interleave(uint32_t odd,uint32_t even)128  uint64_t interleave(uint32_t odd, uint32_t even)
129  {
130      /* Convert to long long */
131      uint64_t myodd = odd;
132      uint64_t myeven = even;
133      /* First, spread bits out */
134      myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
135      myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
136      myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
137      myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
138      myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
139      myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
140      myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
141      myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
142      myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
143      myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
144      /* Now OR together */
145      return myeven | (myodd << 1);
146  }
147  
deinterleave(uint64_t src)148  uint64_t deinterleave(uint64_t src)
149  {
150      /* Get odd and even bits */
151      uint64_t myodd = ((src >> 1) & BITS_MASK_8);
152      uint64_t myeven = (src & BITS_MASK_8);
153  
154      /* Unspread bits */
155      myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
156      myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
157      myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
158      myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
159      myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
160      myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
161      myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
162      myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
163      myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
164      myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
165  
166      /* Return odd bits in upper half */
167      return myeven | (myodd << 32);
168  }
169  
conv_round(int32_t a,int n)170  int32_t conv_round(int32_t a, int n)
171  {
172      int64_t val;
173  
174      if (n == 0) {
175          val = a;
176      } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
177          /* Add LSB from int part */
178          val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
179      } else {
180          val = ((fSE32_64(a)) + (1 << (n - 1)));
181      }
182  
183      val = val >> n;
184      return (int32_t)val;
185  }
186  
187  /* Floating Point Stuff */
188  
189  static const FloatRoundMode softfloat_roundingmodes[] = {
190      float_round_nearest_even,
191      float_round_to_zero,
192      float_round_down,
193      float_round_up,
194  };
195  
arch_fpop_start(CPUHexagonState * env)196  void arch_fpop_start(CPUHexagonState *env)
197  {
198      set_float_exception_flags(0, &env->fp_status);
199      set_float_rounding_mode(
200          softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
201          &env->fp_status);
202  }
203  
204  #ifdef CONFIG_USER_ONLY
205  /*
206   * Hexagon Linux kernel only sets the relevant bits in USR (user status
207   * register).  The exception isn't raised to user mode, so we don't
208   * model it in qemu user mode.
209   */
210  #define RAISE_FP_EXCEPTION   do {} while (0)
211  #endif
212  
213  #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
214      do { \
215          if (flags & FLAG) { \
216              if (GET_USR_FIELD(USR_##MYF) == 0) { \
217                  SET_USR_FIELD(USR_##MYF, 1); \
218                  if (GET_USR_FIELD(USR_##MYE)) { \
219                      RAISE_FP_EXCEPTION; \
220                  } \
221              } \
222          } \
223      } while (0)
224  
arch_fpop_end(CPUHexagonState * env)225  void arch_fpop_end(CPUHexagonState *env)
226  {
227      const bool pkt_need_commit = true;
228      int flags = get_float_exception_flags(&env->fp_status);
229      if (flags != 0) {
230          SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
231          SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
232          SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
233          SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
234          SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
235      }
236  }
237  
arch_sf_recip_common(float32 * Rs,float32 * Rt,float32 * Rd,int * adjust,float_status * fp_status)238  int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
239                           float_status *fp_status)
240  {
241      int n_exp;
242      int d_exp;
243      int ret = 0;
244      float32 RsV, RtV, RdV;
245      int PeV = 0;
246      RsV = *Rs;
247      RtV = *Rt;
248      if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
249          if (extract32(RsV & RtV, 22, 1) == 0) {
250              float_raise(float_flag_invalid, fp_status);
251          }
252          RdV = RsV = RtV = float32_nan;
253      } else if (float32_is_any_nan(RsV)) {
254          if (extract32(RsV, 22, 1) == 0) {
255              float_raise(float_flag_invalid, fp_status);
256          }
257          RdV = RsV = RtV = float32_nan;
258      } else if (float32_is_any_nan(RtV)) {
259          /* or put NaN in num/den fixup? */
260          if (extract32(RtV, 22, 1) == 0) {
261              float_raise(float_flag_invalid, fp_status);
262          }
263          RdV = RsV = RtV = float32_nan;
264      } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
265          /* or put Inf in num fixup? */
266          RdV = RsV = RtV = float32_nan;
267          float_raise(float_flag_invalid, fp_status);
268      } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
269          /* or put zero in num fixup? */
270          RdV = RsV = RtV = float32_nan;
271          float_raise(float_flag_invalid, fp_status);
272      } else if (float32_is_zero(RtV)) {
273          /* or put Inf in num fixup? */
274          uint8_t RsV_sign = float32_is_neg(RsV);
275          uint8_t RtV_sign = float32_is_neg(RtV);
276          /* Check that RsV is NOT infinite before we overwrite it */
277          if (!float32_is_infinity(RsV)) {
278              float_raise(float_flag_divbyzero, fp_status);
279          }
280          RsV = infinite_float32(RsV_sign ^ RtV_sign);
281          RtV = float32_one;
282          RdV = float32_one;
283      } else if (float32_is_infinity(RtV)) {
284          RsV = make_float32(0x80000000 & (RsV ^ RtV));
285          RtV = float32_one;
286          RdV = float32_one;
287      } else if (float32_is_zero(RsV)) {
288          /* Does this just work itself out? */
289          /* No, 0/Inf causes problems. */
290          RsV = make_float32(0x80000000 & (RsV ^ RtV));
291          RtV = float32_one;
292          RdV = float32_one;
293      } else if (float32_is_infinity(RsV)) {
294          uint8_t RsV_sign = float32_is_neg(RsV);
295          uint8_t RtV_sign = float32_is_neg(RtV);
296          RsV = infinite_float32(RsV_sign ^ RtV_sign);
297          RtV = float32_one;
298          RdV = float32_one;
299      } else {
300          PeV = 0x00;
301          /* Basic checks passed */
302          n_exp = float32_getexp_raw(RsV);
303          d_exp = float32_getexp_raw(RtV);
304          if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
305              /* Near quotient underflow / inexact Q */
306              PeV = 0x80;
307              RtV = float32_scalbn(RtV, -64, fp_status);
308              RsV = float32_scalbn(RsV, 64, fp_status);
309          } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
310              /* Near quotient overflow */
311              PeV = 0x40;
312              RtV = float32_scalbn(RtV, 32, fp_status);
313              RsV = float32_scalbn(RsV, -32, fp_status);
314          } else if (n_exp <= SF_MANTBITS + 2) {
315              RtV = float32_scalbn(RtV, 64, fp_status);
316              RsV = float32_scalbn(RsV, 64, fp_status);
317          } else if (d_exp <= 1) {
318              RtV = float32_scalbn(RtV, 32, fp_status);
319              RsV = float32_scalbn(RsV, 32, fp_status);
320          } else if (d_exp > 252) {
321              RtV = float32_scalbn(RtV, -32, fp_status);
322              RsV = float32_scalbn(RsV, -32, fp_status);
323          }
324          RdV = 0;
325          ret = 1;
326      }
327      *Rs = RsV;
328      *Rt = RtV;
329      *Rd = RdV;
330      *adjust = PeV;
331      return ret;
332  }
333  
arch_sf_invsqrt_common(float32 * Rs,float32 * Rd,int * adjust,float_status * fp_status)334  int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
335                             float_status *fp_status)
336  {
337      float32 RsV, RdV;
338      int PeV = 0;
339      int r_exp;
340      int ret = 0;
341      RsV = *Rs;
342      if (float32_is_any_nan(RsV)) {
343          if (extract32(RsV, 22, 1) == 0) {
344              float_raise(float_flag_invalid, fp_status);
345          }
346          RdV = RsV = float32_nan;
347      } else if (float32_lt(RsV, float32_zero, fp_status)) {
348          /* Negative nonzero values are NaN */
349          float_raise(float_flag_invalid, fp_status);
350          RsV = float32_nan;
351          RdV = float32_nan;
352      } else if (float32_is_infinity(RsV)) {
353          /* or put Inf in num fixup? */
354          RsV = infinite_float32(1);
355          RdV = infinite_float32(1);
356      } else if (float32_is_zero(RsV)) {
357          /* or put zero in num fixup? */
358          RdV = float32_one;
359      } else {
360          PeV = 0x00;
361          /* Basic checks passed */
362          r_exp = float32_getexp(RsV);
363          if (r_exp <= 24) {
364              RsV = float32_scalbn(RsV, 64, fp_status);
365              PeV = 0xe0;
366          }
367          RdV = 0;
368          ret = 1;
369      }
370      *Rs = RsV;
371      *Rd = RdV;
372      *adjust = PeV;
373      return ret;
374  }
375  
376  const uint8_t recip_lookup_table[128] = {
377      0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
378      0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
379      0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
380      0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
381      0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
382      0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
383      0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
384      0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
385      0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
386      0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
387      0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
388      0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
389      0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
390      0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
391      0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
392      0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
393  };
394  
395  const uint8_t invsqrt_lookup_table[128] = {
396      0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
397      0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
398      0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
399      0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
400      0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
401      0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
402      0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
403      0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
404      0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
405      0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
406      0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
407      0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
408      0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
409      0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
410      0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
411      0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
412  };
413