1 /* 2 * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "fpu/softfloat.h" 20 #include "cpu.h" 21 #include "fma_emu.h" 22 #include "arch.h" 23 #include "macros.h" 24 25 #define SF_BIAS 127 26 #define SF_MAXEXP 254 27 #define SF_MANTBITS 23 28 #define float32_nan make_float32(0xffffffff) 29 30 /* 31 * These three tables are used by the cabacdecbin instruction 32 */ 33 const uint8_t rLPS_table_64x4[64][4] = { 34 {128, 176, 208, 240}, 35 {128, 167, 197, 227}, 36 {128, 158, 187, 216}, 37 {123, 150, 178, 205}, 38 {116, 142, 169, 195}, 39 {111, 135, 160, 185}, 40 {105, 128, 152, 175}, 41 {100, 122, 144, 166}, 42 {95, 116, 137, 158}, 43 {90, 110, 130, 150}, 44 {85, 104, 123, 142}, 45 {81, 99, 117, 135}, 46 {77, 94, 111, 128}, 47 {73, 89, 105, 122}, 48 {69, 85, 100, 116}, 49 {66, 80, 95, 110}, 50 {62, 76, 90, 104}, 51 {59, 72, 86, 99}, 52 {56, 69, 81, 94}, 53 {53, 65, 77, 89}, 54 {51, 62, 73, 85}, 55 {48, 59, 69, 80}, 56 {46, 56, 66, 76}, 57 {43, 53, 63, 72}, 58 {41, 50, 59, 69}, 59 {39, 48, 56, 65}, 60 {37, 45, 54, 62}, 61 {35, 43, 51, 59}, 62 {33, 41, 48, 56}, 63 {32, 39, 46, 53}, 64 {30, 37, 43, 50}, 65 {29, 35, 41, 48}, 66 {27, 33, 39, 45}, 67 {26, 31, 37, 43}, 68 {24, 30, 35, 41}, 69 {23, 28, 33, 39}, 70 {22, 27, 32, 37}, 71 {21, 26, 30, 35}, 72 {20, 24, 29, 33}, 73 {19, 23, 27, 31}, 74 {18, 22, 26, 30}, 75 {17, 21, 25, 28}, 76 {16, 20, 23, 27}, 77 {15, 19, 22, 25}, 78 {14, 18, 21, 24}, 79 {14, 17, 20, 23}, 80 {13, 16, 19, 22}, 81 {12, 15, 18, 21}, 82 {12, 14, 17, 20}, 83 {11, 14, 16, 19}, 84 {11, 13, 15, 18}, 85 {10, 12, 15, 17}, 86 {10, 12, 14, 16}, 87 {9, 11, 13, 15}, 88 {9, 11, 12, 14}, 89 {8, 10, 12, 14}, 90 {8, 9, 11, 13}, 91 {7, 9, 11, 12}, 92 {7, 9, 10, 12}, 93 {7, 8, 10, 11}, 94 {6, 8, 9, 11}, 95 {6, 7, 9, 10}, 96 {6, 7, 8, 9}, 97 {2, 2, 2, 2} 98 }; 99 100 const uint8_t AC_next_state_MPS_64[64] = { 101 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 102 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 103 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 104 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 105 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 106 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 107 61, 62, 62, 63 108 }; 109 110 111 const uint8_t AC_next_state_LPS_64[64] = { 112 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, 113 8, 9, 9, 11, 11, 12, 13, 13, 15, 15, 114 16, 16, 18, 18, 19, 19, 21, 21, 22, 22, 115 23, 24, 24, 25, 26, 26, 27, 27, 28, 29, 116 29, 30, 30, 30, 31, 32, 32, 33, 33, 33, 117 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 118 37, 38, 38, 63 119 }; 120 121 #define BITS_MASK_8 0x5555555555555555ULL 122 #define PAIR_MASK_8 0x3333333333333333ULL 123 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL 124 #define BYTE_MASK_8 0x00ff00ff00ff00ffULL 125 #define HALF_MASK_8 0x0000ffff0000ffffULL 126 #define WORD_MASK_8 0x00000000ffffffffULL 127 128 uint64_t interleave(uint32_t odd, uint32_t even) 129 { 130 /* Convert to long long */ 131 uint64_t myodd = odd; 132 uint64_t myeven = even; 133 /* First, spread bits out */ 134 myodd = (myodd | (myodd << 16)) & HALF_MASK_8; 135 myeven = (myeven | (myeven << 16)) & HALF_MASK_8; 136 myodd = (myodd | (myodd << 8)) & BYTE_MASK_8; 137 myeven = (myeven | (myeven << 8)) & BYTE_MASK_8; 138 myodd = (myodd | (myodd << 4)) & NYBL_MASK_8; 139 myeven = (myeven | (myeven << 4)) & NYBL_MASK_8; 140 myodd = (myodd | (myodd << 2)) & PAIR_MASK_8; 141 myeven = (myeven | (myeven << 2)) & PAIR_MASK_8; 142 myodd = (myodd | (myodd << 1)) & BITS_MASK_8; 143 myeven = (myeven | (myeven << 1)) & BITS_MASK_8; 144 /* Now OR together */ 145 return myeven | (myodd << 1); 146 } 147 148 uint64_t deinterleave(uint64_t src) 149 { 150 /* Get odd and even bits */ 151 uint64_t myodd = ((src >> 1) & BITS_MASK_8); 152 uint64_t myeven = (src & BITS_MASK_8); 153 154 /* Unspread bits */ 155 myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8; 156 myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8; 157 myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8; 158 myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8; 159 myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8; 160 myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8; 161 myeven = (myeven | (myeven >> 8)) & HALF_MASK_8; 162 myodd = (myodd | (myodd >> 8)) & HALF_MASK_8; 163 myeven = (myeven | (myeven >> 16)) & WORD_MASK_8; 164 myodd = (myodd | (myodd >> 16)) & WORD_MASK_8; 165 166 /* Return odd bits in upper half */ 167 return myeven | (myodd << 32); 168 } 169 170 int32_t conv_round(int32_t a, int n) 171 { 172 int64_t val; 173 174 if (n == 0) { 175 val = a; 176 } else if ((a & ((1 << (n - 1)) - 1)) == 0) { /* N-1..0 all zero? */ 177 /* Add LSB from int part */ 178 val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1)); 179 } else { 180 val = ((fSE32_64(a)) + (1 << (n - 1))); 181 } 182 183 val = val >> n; 184 return (int32_t)val; 185 } 186 187 /* Floating Point Stuff */ 188 189 static const FloatRoundMode softfloat_roundingmodes[] = { 190 float_round_nearest_even, 191 float_round_to_zero, 192 float_round_down, 193 float_round_up, 194 }; 195 196 void arch_fpop_start(CPUHexagonState *env) 197 { 198 set_float_exception_flags(0, &env->fp_status); 199 set_float_rounding_mode( 200 softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)], 201 &env->fp_status); 202 } 203 204 #ifdef CONFIG_USER_ONLY 205 /* 206 * Hexagon Linux kernel only sets the relevant bits in USR (user status 207 * register). The exception isn't raised to user mode, so we don't 208 * model it in qemu user mode. 209 */ 210 #define RAISE_FP_EXCEPTION do {} while (0) 211 #endif 212 213 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \ 214 do { \ 215 if (flags & FLAG) { \ 216 if (GET_USR_FIELD(USR_##MYF) == 0) { \ 217 SET_USR_FIELD(USR_##MYF, 1); \ 218 if (GET_USR_FIELD(USR_##MYE)) { \ 219 RAISE_FP_EXCEPTION; \ 220 } \ 221 } \ 222 } \ 223 } while (0) 224 225 void arch_fpop_end(CPUHexagonState *env) 226 { 227 int flags = get_float_exception_flags(&env->fp_status); 228 if (flags != 0) { 229 SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); 230 SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE); 231 SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE); 232 SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE); 233 SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE); 234 } 235 } 236 237 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, 238 float_status *fp_status) 239 { 240 int n_exp; 241 int d_exp; 242 int ret = 0; 243 float32 RsV, RtV, RdV; 244 int PeV = 0; 245 RsV = *Rs; 246 RtV = *Rt; 247 if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) { 248 if (extract32(RsV & RtV, 22, 1) == 0) { 249 float_raise(float_flag_invalid, fp_status); 250 } 251 RdV = RsV = RtV = float32_nan; 252 } else if (float32_is_any_nan(RsV)) { 253 if (extract32(RsV, 22, 1) == 0) { 254 float_raise(float_flag_invalid, fp_status); 255 } 256 RdV = RsV = RtV = float32_nan; 257 } else if (float32_is_any_nan(RtV)) { 258 /* or put NaN in num/den fixup? */ 259 if (extract32(RtV, 22, 1) == 0) { 260 float_raise(float_flag_invalid, fp_status); 261 } 262 RdV = RsV = RtV = float32_nan; 263 } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) { 264 /* or put Inf in num fixup? */ 265 RdV = RsV = RtV = float32_nan; 266 float_raise(float_flag_invalid, fp_status); 267 } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) { 268 /* or put zero in num fixup? */ 269 RdV = RsV = RtV = float32_nan; 270 float_raise(float_flag_invalid, fp_status); 271 } else if (float32_is_zero(RtV)) { 272 /* or put Inf in num fixup? */ 273 uint8_t RsV_sign = float32_is_neg(RsV); 274 uint8_t RtV_sign = float32_is_neg(RtV); 275 /* Check that RsV is NOT infinite before we overwrite it */ 276 if (!float32_is_infinity(RsV)) { 277 float_raise(float_flag_divbyzero, fp_status); 278 } 279 RsV = infinite_float32(RsV_sign ^ RtV_sign); 280 RtV = float32_one; 281 RdV = float32_one; 282 } else if (float32_is_infinity(RtV)) { 283 RsV = make_float32(0x80000000 & (RsV ^ RtV)); 284 RtV = float32_one; 285 RdV = float32_one; 286 } else if (float32_is_zero(RsV)) { 287 /* Does this just work itself out? */ 288 /* No, 0/Inf causes problems. */ 289 RsV = make_float32(0x80000000 & (RsV ^ RtV)); 290 RtV = float32_one; 291 RdV = float32_one; 292 } else if (float32_is_infinity(RsV)) { 293 uint8_t RsV_sign = float32_is_neg(RsV); 294 uint8_t RtV_sign = float32_is_neg(RtV); 295 RsV = infinite_float32(RsV_sign ^ RtV_sign); 296 RtV = float32_one; 297 RdV = float32_one; 298 } else { 299 PeV = 0x00; 300 /* Basic checks passed */ 301 n_exp = float32_getexp_raw(RsV); 302 d_exp = float32_getexp_raw(RtV); 303 if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { 304 /* Near quotient underflow / inexact Q */ 305 PeV = 0x80; 306 RtV = float32_scalbn(RtV, -64, fp_status); 307 RsV = float32_scalbn(RsV, 64, fp_status); 308 } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) { 309 /* Near quotient overflow */ 310 PeV = 0x40; 311 RtV = float32_scalbn(RtV, 32, fp_status); 312 RsV = float32_scalbn(RsV, -32, fp_status); 313 } else if (n_exp <= SF_MANTBITS + 2) { 314 RtV = float32_scalbn(RtV, 64, fp_status); 315 RsV = float32_scalbn(RsV, 64, fp_status); 316 } else if (d_exp <= 1) { 317 RtV = float32_scalbn(RtV, 32, fp_status); 318 RsV = float32_scalbn(RsV, 32, fp_status); 319 } else if (d_exp > 252) { 320 RtV = float32_scalbn(RtV, -32, fp_status); 321 RsV = float32_scalbn(RsV, -32, fp_status); 322 } 323 RdV = 0; 324 ret = 1; 325 } 326 *Rs = RsV; 327 *Rt = RtV; 328 *Rd = RdV; 329 *adjust = PeV; 330 return ret; 331 } 332 333 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, 334 float_status *fp_status) 335 { 336 float32 RsV, RdV; 337 int PeV = 0; 338 int r_exp; 339 int ret = 0; 340 RsV = *Rs; 341 if (float32_is_any_nan(RsV)) { 342 if (extract32(RsV, 22, 1) == 0) { 343 float_raise(float_flag_invalid, fp_status); 344 } 345 RdV = RsV = float32_nan; 346 } else if (float32_lt(RsV, float32_zero, fp_status)) { 347 /* Negative nonzero values are NaN */ 348 float_raise(float_flag_invalid, fp_status); 349 RsV = float32_nan; 350 RdV = float32_nan; 351 } else if (float32_is_infinity(RsV)) { 352 /* or put Inf in num fixup? */ 353 RsV = infinite_float32(1); 354 RdV = infinite_float32(1); 355 } else if (float32_is_zero(RsV)) { 356 /* or put zero in num fixup? */ 357 RdV = float32_one; 358 } else { 359 PeV = 0x00; 360 /* Basic checks passed */ 361 r_exp = float32_getexp(RsV); 362 if (r_exp <= 24) { 363 RsV = float32_scalbn(RsV, 64, fp_status); 364 PeV = 0xe0; 365 } 366 RdV = 0; 367 ret = 1; 368 } 369 *Rs = RsV; 370 *Rd = RdV; 371 *adjust = PeV; 372 return ret; 373 } 374 375 const uint8_t recip_lookup_table[128] = { 376 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4, 377 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9, 378 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1, 379 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b, 380 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087, 381 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075, 382 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065, 383 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056, 384 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049, 385 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c, 386 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030, 387 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025, 388 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b, 389 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012, 390 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, 391 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000, 392 }; 393 394 const uint8_t invsqrt_lookup_table[128] = { 395 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057, 396 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045, 397 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036, 398 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028, 399 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d, 400 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012, 401 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, 402 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001, 403 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4, 404 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb, 405 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6, 406 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3, 407 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093, 408 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084, 409 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077, 410 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b, 411 }; 412