1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "fpu/softfloat.h" 20 #include "cpu.h" 21 #include "fma_emu.h" 22 #include "arch.h" 23 #include "macros.h" 24 25 #define SF_BIAS 127 26 #define SF_MAXEXP 254 27 #define SF_MANTBITS 23 28 #define float32_nan make_float32(0xffffffff) 29 30 /* 31 * These three tables are used by the cabacdecbin instruction 32 */ 33 const uint8_t rLPS_table_64x4[64][4] = { 34 {128, 176, 208, 240}, 35 {128, 167, 197, 227}, 36 {128, 158, 187, 216}, 37 {123, 150, 178, 205}, 38 {116, 142, 169, 195}, 39 {111, 135, 160, 185}, 40 {105, 128, 152, 175}, 41 {100, 122, 144, 166}, 42 {95, 116, 137, 158}, 43 {90, 110, 130, 150}, 44 {85, 104, 123, 142}, 45 {81, 99, 117, 135}, 46 {77, 94, 111, 128}, 47 {73, 89, 105, 122}, 48 {69, 85, 100, 116}, 49 {66, 80, 95, 110}, 50 {62, 76, 90, 104}, 51 {59, 72, 86, 99}, 52 {56, 69, 81, 94}, 53 {53, 65, 77, 89}, 54 {51, 62, 73, 85}, 55 {48, 59, 69, 80}, 56 {46, 56, 66, 76}, 57 {43, 53, 63, 72}, 58 {41, 50, 59, 69}, 59 {39, 48, 56, 65}, 60 {37, 45, 54, 62}, 61 {35, 43, 51, 59}, 62 {33, 41, 48, 56}, 63 {32, 39, 46, 53}, 64 {30, 37, 43, 50}, 65 {29, 35, 41, 48}, 66 {27, 33, 39, 45}, 67 {26, 31, 37, 43}, 68 {24, 30, 35, 41}, 69 {23, 28, 33, 39}, 70 {22, 27, 32, 37}, 71 {21, 26, 30, 35}, 72 {20, 24, 29, 33}, 73 {19, 23, 27, 31}, 74 {18, 22, 26, 30}, 75 {17, 21, 25, 28}, 76 {16, 20, 23, 27}, 77 {15, 19, 22, 25}, 78 {14, 18, 21, 24}, 79 {14, 17, 20, 23}, 80 {13, 16, 19, 22}, 81 {12, 15, 18, 21}, 82 {12, 14, 17, 20}, 83 {11, 14, 16, 19}, 84 {11, 13, 15, 18}, 85 {10, 12, 15, 17}, 86 {10, 12, 14, 16}, 87 {9, 11, 13, 15}, 88 {9, 11, 12, 14}, 89 {8, 10, 12, 14}, 90 {8, 9, 11, 13}, 91 {7, 9, 11, 12}, 92 {7, 9, 10, 12}, 93 {7, 8, 10, 11}, 94 {6, 8, 9, 11}, 95 {6, 7, 9, 10}, 96 {6, 7, 8, 9}, 97 {2, 2, 2, 2} 98 }; 99 100 const uint8_t AC_next_state_MPS_64[64] = { 101 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 102 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 103 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 104 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 105 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 106 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 107 61, 62, 62, 63 108 }; 109 110 111 const uint8_t AC_next_state_LPS_64[64] = { 112 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, 113 8, 9, 9, 11, 11, 12, 13, 13, 15, 15, 114 16, 16, 18, 18, 19, 19, 21, 21, 22, 22, 115 23, 24, 24, 25, 26, 26, 27, 27, 28, 29, 116 29, 30, 30, 30, 31, 32, 32, 33, 33, 33, 117 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 118 37, 38, 38, 63 119 }; 120 121 #define BITS_MASK_8 0x5555555555555555ULL 122 #define PAIR_MASK_8 0x3333333333333333ULL 123 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL 124 #define BYTE_MASK_8 0x00ff00ff00ff00ffULL 125 #define HALF_MASK_8 0x0000ffff0000ffffULL 126 #define WORD_MASK_8 0x00000000ffffffffULL 127 128 uint64_t interleave(uint32_t odd, uint32_t even) 129 { 130 /* Convert to long long */ 131 uint64_t myodd = odd; 132 uint64_t myeven = even; 133 /* First, spread bits out */ 134 myodd = (myodd | (myodd << 16)) & HALF_MASK_8; 135 myeven = (myeven | (myeven << 16)) & HALF_MASK_8; 136 myodd = (myodd | (myodd << 8)) & BYTE_MASK_8; 137 myeven = (myeven | (myeven << 8)) & BYTE_MASK_8; 138 myodd = (myodd | (myodd << 4)) & NYBL_MASK_8; 139 myeven = (myeven | (myeven << 4)) & NYBL_MASK_8; 140 myodd = (myodd | (myodd << 2)) & PAIR_MASK_8; 141 myeven = (myeven | (myeven << 2)) & PAIR_MASK_8; 142 myodd = (myodd | (myodd << 1)) & BITS_MASK_8; 143 myeven = (myeven | (myeven << 1)) & BITS_MASK_8; 144 /* Now OR together */ 145 return myeven | (myodd << 1); 146 } 147 148 uint64_t deinterleave(uint64_t src) 149 { 150 /* Get odd and even bits */ 151 uint64_t myodd = ((src >> 1) & BITS_MASK_8); 152 uint64_t myeven = (src & BITS_MASK_8); 153 154 /* Unspread bits */ 155 myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8; 156 myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8; 157 myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8; 158 myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8; 159 myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8; 160 myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8; 161 myeven = (myeven | (myeven >> 8)) & HALF_MASK_8; 162 myodd = (myodd | (myodd >> 8)) & HALF_MASK_8; 163 myeven = (myeven | (myeven >> 16)) & WORD_MASK_8; 164 myodd = (myodd | (myodd >> 16)) & WORD_MASK_8; 165 166 /* Return odd bits in upper half */ 167 return myeven | (myodd << 32); 168 } 169 170 int32_t conv_round(int32_t a, int n) 171 { 172 int64_t val; 173 174 if (n == 0) { 175 val = a; 176 } else if ((a & ((1 << (n - 1)) - 1)) == 0) { /* N-1..0 all zero? */ 177 /* Add LSB from int part */ 178 val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1)); 179 } else { 180 val = ((fSE32_64(a)) + (1 << (n - 1))); 181 } 182 183 val = val >> n; 184 return (int32_t)val; 185 } 186 187 /* Floating Point Stuff */ 188 189 static const FloatRoundMode softfloat_roundingmodes[] = { 190 float_round_nearest_even, 191 float_round_to_zero, 192 float_round_down, 193 float_round_up, 194 }; 195 196 void arch_fpop_start(CPUHexagonState *env) 197 { 198 set_float_exception_flags(0, &env->fp_status); 199 set_float_rounding_mode( 200 softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)], 201 &env->fp_status); 202 } 203 204 #ifdef CONFIG_USER_ONLY 205 /* 206 * Hexagon Linux kernel only sets the relevant bits in USR (user status 207 * register). The exception isn't raised to user mode, so we don't 208 * model it in qemu user mode. 209 */ 210 #define RAISE_FP_EXCEPTION do {} while (0) 211 #endif 212 213 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \ 214 do { \ 215 if (flags & FLAG) { \ 216 if (GET_USR_FIELD(USR_##MYF) == 0) { \ 217 SET_USR_FIELD(USR_##MYF, 1); \ 218 if (GET_USR_FIELD(USR_##MYE)) { \ 219 RAISE_FP_EXCEPTION; \ 220 } \ 221 } \ 222 } \ 223 } while (0) 224 225 void arch_fpop_end(CPUHexagonState *env) 226 { 227 const bool pkt_need_commit = true; 228 int flags = get_float_exception_flags(&env->fp_status); 229 if (flags != 0) { 230 SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); 231 SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE); 232 SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE); 233 SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE); 234 SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE); 235 } 236 } 237 238 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, 239 float_status *fp_status) 240 { 241 int n_exp; 242 int d_exp; 243 int ret = 0; 244 float32 RsV, RtV, RdV; 245 int PeV = 0; 246 RsV = *Rs; 247 RtV = *Rt; 248 if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) { 249 if (extract32(RsV & RtV, 22, 1) == 0) { 250 float_raise(float_flag_invalid, fp_status); 251 } 252 RdV = RsV = RtV = float32_nan; 253 } else if (float32_is_any_nan(RsV)) { 254 if (extract32(RsV, 22, 1) == 0) { 255 float_raise(float_flag_invalid, fp_status); 256 } 257 RdV = RsV = RtV = float32_nan; 258 } else if (float32_is_any_nan(RtV)) { 259 /* or put NaN in num/den fixup? */ 260 if (extract32(RtV, 22, 1) == 0) { 261 float_raise(float_flag_invalid, fp_status); 262 } 263 RdV = RsV = RtV = float32_nan; 264 } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) { 265 /* or put Inf in num fixup? */ 266 RdV = RsV = RtV = float32_nan; 267 float_raise(float_flag_invalid, fp_status); 268 } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) { 269 /* or put zero in num fixup? */ 270 RdV = RsV = RtV = float32_nan; 271 float_raise(float_flag_invalid, fp_status); 272 } else if (float32_is_zero(RtV)) { 273 /* or put Inf in num fixup? */ 274 uint8_t RsV_sign = float32_is_neg(RsV); 275 uint8_t RtV_sign = float32_is_neg(RtV); 276 /* Check that RsV is NOT infinite before we overwrite it */ 277 if (!float32_is_infinity(RsV)) { 278 float_raise(float_flag_divbyzero, fp_status); 279 } 280 RsV = infinite_float32(RsV_sign ^ RtV_sign); 281 RtV = float32_one; 282 RdV = float32_one; 283 } else if (float32_is_infinity(RtV)) { 284 RsV = make_float32(0x80000000 & (RsV ^ RtV)); 285 RtV = float32_one; 286 RdV = float32_one; 287 } else if (float32_is_zero(RsV)) { 288 /* Does this just work itself out? */ 289 /* No, 0/Inf causes problems. */ 290 RsV = make_float32(0x80000000 & (RsV ^ RtV)); 291 RtV = float32_one; 292 RdV = float32_one; 293 } else if (float32_is_infinity(RsV)) { 294 uint8_t RsV_sign = float32_is_neg(RsV); 295 uint8_t RtV_sign = float32_is_neg(RtV); 296 RsV = infinite_float32(RsV_sign ^ RtV_sign); 297 RtV = float32_one; 298 RdV = float32_one; 299 } else { 300 PeV = 0x00; 301 /* Basic checks passed */ 302 n_exp = float32_getexp_raw(RsV); 303 d_exp = float32_getexp_raw(RtV); 304 if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { 305 /* Near quotient underflow / inexact Q */ 306 PeV = 0x80; 307 RtV = float32_scalbn(RtV, -64, fp_status); 308 RsV = float32_scalbn(RsV, 64, fp_status); 309 } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) { 310 /* Near quotient overflow */ 311 PeV = 0x40; 312 RtV = float32_scalbn(RtV, 32, fp_status); 313 RsV = float32_scalbn(RsV, -32, fp_status); 314 } else if (n_exp <= SF_MANTBITS + 2) { 315 RtV = float32_scalbn(RtV, 64, fp_status); 316 RsV = float32_scalbn(RsV, 64, fp_status); 317 } else if (d_exp <= 1) { 318 RtV = float32_scalbn(RtV, 32, fp_status); 319 RsV = float32_scalbn(RsV, 32, fp_status); 320 } else if (d_exp > 252) { 321 RtV = float32_scalbn(RtV, -32, fp_status); 322 RsV = float32_scalbn(RsV, -32, fp_status); 323 } 324 RdV = 0; 325 ret = 1; 326 } 327 *Rs = RsV; 328 *Rt = RtV; 329 *Rd = RdV; 330 *adjust = PeV; 331 return ret; 332 } 333 334 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, 335 float_status *fp_status) 336 { 337 float32 RsV, RdV; 338 int PeV = 0; 339 int r_exp; 340 int ret = 0; 341 RsV = *Rs; 342 if (float32_is_any_nan(RsV)) { 343 if (extract32(RsV, 22, 1) == 0) { 344 float_raise(float_flag_invalid, fp_status); 345 } 346 RdV = RsV = float32_nan; 347 } else if (float32_lt(RsV, float32_zero, fp_status)) { 348 /* Negative nonzero values are NaN */ 349 float_raise(float_flag_invalid, fp_status); 350 RsV = float32_nan; 351 RdV = float32_nan; 352 } else if (float32_is_infinity(RsV)) { 353 /* or put Inf in num fixup? */ 354 RsV = infinite_float32(1); 355 RdV = infinite_float32(1); 356 } else if (float32_is_zero(RsV)) { 357 /* or put zero in num fixup? */ 358 RdV = float32_one; 359 } else { 360 PeV = 0x00; 361 /* Basic checks passed */ 362 r_exp = float32_getexp(RsV); 363 if (r_exp <= 24) { 364 RsV = float32_scalbn(RsV, 64, fp_status); 365 PeV = 0xe0; 366 } 367 RdV = 0; 368 ret = 1; 369 } 370 *Rs = RsV; 371 *Rd = RdV; 372 *adjust = PeV; 373 return ret; 374 } 375 376 const uint8_t recip_lookup_table[128] = { 377 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4, 378 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9, 379 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1, 380 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b, 381 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087, 382 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075, 383 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065, 384 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056, 385 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049, 386 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c, 387 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030, 388 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025, 389 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b, 390 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012, 391 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, 392 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000, 393 }; 394 395 const uint8_t invsqrt_lookup_table[128] = { 396 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057, 397 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045, 398 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036, 399 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028, 400 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d, 401 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012, 402 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, 403 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001, 404 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4, 405 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb, 406 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6, 407 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3, 408 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093, 409 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084, 410 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077, 411 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b, 412 }; 413