1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 35 opr_sz, max_sz, 0, fn); 36 } 37 38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40 { 41 static gen_helper_gvec_3_ptr * const fns[2] = { 42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 43 }; 44 tcg_debug_assert(vece >= 1 && vece <= 2); 45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46 } 47 48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50 { 51 static gen_helper_gvec_3_ptr * const fns[2] = { 52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 53 }; 54 tcg_debug_assert(vece >= 1 && vece <= 2); 55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56 } 57 58 #define GEN_CMP0(NAME, COND) \ 59 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 60 uint32_t opr_sz, uint32_t max_sz) \ 61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 62 63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 68 69 #undef GEN_CMP0 70 71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72 { 73 tcg_gen_vec_sar8i_i64(a, a, shift); 74 tcg_gen_vec_add8_i64(d, d, a); 75 } 76 77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78 { 79 tcg_gen_vec_sar16i_i64(a, a, shift); 80 tcg_gen_vec_add16_i64(d, d, a); 81 } 82 83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 84 { 85 tcg_gen_sari_i32(a, a, shift); 86 tcg_gen_add_i32(d, d, a); 87 } 88 89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 90 { 91 tcg_gen_sari_i64(a, a, shift); 92 tcg_gen_add_i64(d, d, a); 93 } 94 95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 96 { 97 tcg_gen_sari_vec(vece, a, a, sh); 98 tcg_gen_add_vec(vece, d, d, a); 99 } 100 101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 102 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 103 { 104 static const TCGOpcode vecop_list[] = { 105 INDEX_op_sari_vec, INDEX_op_add_vec, 0 106 }; 107 static const GVecGen2i ops[4] = { 108 { .fni8 = gen_ssra8_i64, 109 .fniv = gen_ssra_vec, 110 .fno = gen_helper_gvec_ssra_b, 111 .load_dest = true, 112 .opt_opc = vecop_list, 113 .vece = MO_8 }, 114 { .fni8 = gen_ssra16_i64, 115 .fniv = gen_ssra_vec, 116 .fno = gen_helper_gvec_ssra_h, 117 .load_dest = true, 118 .opt_opc = vecop_list, 119 .vece = MO_16 }, 120 { .fni4 = gen_ssra32_i32, 121 .fniv = gen_ssra_vec, 122 .fno = gen_helper_gvec_ssra_s, 123 .load_dest = true, 124 .opt_opc = vecop_list, 125 .vece = MO_32 }, 126 { .fni8 = gen_ssra64_i64, 127 .fniv = gen_ssra_vec, 128 .fno = gen_helper_gvec_ssra_d, 129 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 130 .opt_opc = vecop_list, 131 .load_dest = true, 132 .vece = MO_64 }, 133 }; 134 135 /* tszimm encoding produces immediates in the range [1..esize]. */ 136 tcg_debug_assert(shift > 0); 137 tcg_debug_assert(shift <= (8 << vece)); 138 139 /* 140 * Shifts larger than the element size are architecturally valid. 141 * Signed results in all sign bits. 142 */ 143 shift = MIN(shift, (8 << vece) - 1); 144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 145 } 146 147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 148 { 149 tcg_gen_vec_shr8i_i64(a, a, shift); 150 tcg_gen_vec_add8_i64(d, d, a); 151 } 152 153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 154 { 155 tcg_gen_vec_shr16i_i64(a, a, shift); 156 tcg_gen_vec_add16_i64(d, d, a); 157 } 158 159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 160 { 161 tcg_gen_shri_i32(a, a, shift); 162 tcg_gen_add_i32(d, d, a); 163 } 164 165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 166 { 167 tcg_gen_shri_i64(a, a, shift); 168 tcg_gen_add_i64(d, d, a); 169 } 170 171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 172 { 173 tcg_gen_shri_vec(vece, a, a, sh); 174 tcg_gen_add_vec(vece, d, d, a); 175 } 176 177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 178 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 179 { 180 static const TCGOpcode vecop_list[] = { 181 INDEX_op_shri_vec, INDEX_op_add_vec, 0 182 }; 183 static const GVecGen2i ops[4] = { 184 { .fni8 = gen_usra8_i64, 185 .fniv = gen_usra_vec, 186 .fno = gen_helper_gvec_usra_b, 187 .load_dest = true, 188 .opt_opc = vecop_list, 189 .vece = MO_8, }, 190 { .fni8 = gen_usra16_i64, 191 .fniv = gen_usra_vec, 192 .fno = gen_helper_gvec_usra_h, 193 .load_dest = true, 194 .opt_opc = vecop_list, 195 .vece = MO_16, }, 196 { .fni4 = gen_usra32_i32, 197 .fniv = gen_usra_vec, 198 .fno = gen_helper_gvec_usra_s, 199 .load_dest = true, 200 .opt_opc = vecop_list, 201 .vece = MO_32, }, 202 { .fni8 = gen_usra64_i64, 203 .fniv = gen_usra_vec, 204 .fno = gen_helper_gvec_usra_d, 205 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 206 .load_dest = true, 207 .opt_opc = vecop_list, 208 .vece = MO_64, }, 209 }; 210 211 /* tszimm encoding produces immediates in the range [1..esize]. */ 212 tcg_debug_assert(shift > 0); 213 tcg_debug_assert(shift <= (8 << vece)); 214 215 /* 216 * Shifts larger than the element size are architecturally valid. 217 * Unsigned results in all zeros as input to accumulate: nop. 218 */ 219 if (shift < (8 << vece)) { 220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 221 } else { 222 /* Nop, but we do need to clear the tail. */ 223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 224 } 225 } 226 227 /* 228 * Shift one less than the requested amount, and the low bit is 229 * the rounding bit. For the 8 and 16-bit operations, because we 230 * mask the low bit, we can perform a normal integer shift instead 231 * of a vector shift. 232 */ 233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 234 { 235 TCGv_i64 t = tcg_temp_new_i64(); 236 237 tcg_gen_shri_i64(t, a, sh - 1); 238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 239 tcg_gen_vec_sar8i_i64(d, a, sh); 240 tcg_gen_vec_add8_i64(d, d, t); 241 } 242 243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 244 { 245 TCGv_i64 t = tcg_temp_new_i64(); 246 247 tcg_gen_shri_i64(t, a, sh - 1); 248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 249 tcg_gen_vec_sar16i_i64(d, a, sh); 250 tcg_gen_vec_add16_i64(d, d, t); 251 } 252 253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 254 { 255 TCGv_i32 t; 256 257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 258 if (sh == 32) { 259 tcg_gen_movi_i32(d, 0); 260 return; 261 } 262 t = tcg_temp_new_i32(); 263 tcg_gen_extract_i32(t, a, sh - 1, 1); 264 tcg_gen_sari_i32(d, a, sh); 265 tcg_gen_add_i32(d, d, t); 266 } 267 268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 269 { 270 TCGv_i64 t = tcg_temp_new_i64(); 271 272 tcg_gen_extract_i64(t, a, sh - 1, 1); 273 tcg_gen_sari_i64(d, a, sh); 274 tcg_gen_add_i64(d, d, t); 275 } 276 277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 278 { 279 TCGv_vec t = tcg_temp_new_vec_matching(d); 280 TCGv_vec ones = tcg_temp_new_vec_matching(d); 281 282 tcg_gen_shri_vec(vece, t, a, sh - 1); 283 tcg_gen_dupi_vec(vece, ones, 1); 284 tcg_gen_and_vec(vece, t, t, ones); 285 tcg_gen_sari_vec(vece, d, a, sh); 286 tcg_gen_add_vec(vece, d, d, t); 287 } 288 289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 290 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 291 { 292 static const TCGOpcode vecop_list[] = { 293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 294 }; 295 static const GVecGen2i ops[4] = { 296 { .fni8 = gen_srshr8_i64, 297 .fniv = gen_srshr_vec, 298 .fno = gen_helper_gvec_srshr_b, 299 .opt_opc = vecop_list, 300 .vece = MO_8 }, 301 { .fni8 = gen_srshr16_i64, 302 .fniv = gen_srshr_vec, 303 .fno = gen_helper_gvec_srshr_h, 304 .opt_opc = vecop_list, 305 .vece = MO_16 }, 306 { .fni4 = gen_srshr32_i32, 307 .fniv = gen_srshr_vec, 308 .fno = gen_helper_gvec_srshr_s, 309 .opt_opc = vecop_list, 310 .vece = MO_32 }, 311 { .fni8 = gen_srshr64_i64, 312 .fniv = gen_srshr_vec, 313 .fno = gen_helper_gvec_srshr_d, 314 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 315 .opt_opc = vecop_list, 316 .vece = MO_64 }, 317 }; 318 319 /* tszimm encoding produces immediates in the range [1..esize] */ 320 tcg_debug_assert(shift > 0); 321 tcg_debug_assert(shift <= (8 << vece)); 322 323 if (shift == (8 << vece)) { 324 /* 325 * Shifts larger than the element size are architecturally valid. 326 * Signed results in all sign bits. With rounding, this produces 327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 328 * I.e. always zero. 329 */ 330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 331 } else { 332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 333 } 334 } 335 336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 337 { 338 TCGv_i64 t = tcg_temp_new_i64(); 339 340 gen_srshr8_i64(t, a, sh); 341 tcg_gen_vec_add8_i64(d, d, t); 342 } 343 344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 345 { 346 TCGv_i64 t = tcg_temp_new_i64(); 347 348 gen_srshr16_i64(t, a, sh); 349 tcg_gen_vec_add16_i64(d, d, t); 350 } 351 352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 353 { 354 TCGv_i32 t = tcg_temp_new_i32(); 355 356 gen_srshr32_i32(t, a, sh); 357 tcg_gen_add_i32(d, d, t); 358 } 359 360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 361 { 362 TCGv_i64 t = tcg_temp_new_i64(); 363 364 gen_srshr64_i64(t, a, sh); 365 tcg_gen_add_i64(d, d, t); 366 } 367 368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 369 { 370 TCGv_vec t = tcg_temp_new_vec_matching(d); 371 372 gen_srshr_vec(vece, t, a, sh); 373 tcg_gen_add_vec(vece, d, d, t); 374 } 375 376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 377 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 378 { 379 static const TCGOpcode vecop_list[] = { 380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 381 }; 382 static const GVecGen2i ops[4] = { 383 { .fni8 = gen_srsra8_i64, 384 .fniv = gen_srsra_vec, 385 .fno = gen_helper_gvec_srsra_b, 386 .opt_opc = vecop_list, 387 .load_dest = true, 388 .vece = MO_8 }, 389 { .fni8 = gen_srsra16_i64, 390 .fniv = gen_srsra_vec, 391 .fno = gen_helper_gvec_srsra_h, 392 .opt_opc = vecop_list, 393 .load_dest = true, 394 .vece = MO_16 }, 395 { .fni4 = gen_srsra32_i32, 396 .fniv = gen_srsra_vec, 397 .fno = gen_helper_gvec_srsra_s, 398 .opt_opc = vecop_list, 399 .load_dest = true, 400 .vece = MO_32 }, 401 { .fni8 = gen_srsra64_i64, 402 .fniv = gen_srsra_vec, 403 .fno = gen_helper_gvec_srsra_d, 404 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 405 .opt_opc = vecop_list, 406 .load_dest = true, 407 .vece = MO_64 }, 408 }; 409 410 /* tszimm encoding produces immediates in the range [1..esize] */ 411 tcg_debug_assert(shift > 0); 412 tcg_debug_assert(shift <= (8 << vece)); 413 414 /* 415 * Shifts larger than the element size are architecturally valid. 416 * Signed results in all sign bits. With rounding, this produces 417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 418 * I.e. always zero. With accumulation, this leaves D unchanged. 419 */ 420 if (shift == (8 << vece)) { 421 /* Nop, but we do need to clear the tail. */ 422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 423 } else { 424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 425 } 426 } 427 428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 429 { 430 TCGv_i64 t = tcg_temp_new_i64(); 431 432 tcg_gen_shri_i64(t, a, sh - 1); 433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 434 tcg_gen_vec_shr8i_i64(d, a, sh); 435 tcg_gen_vec_add8_i64(d, d, t); 436 } 437 438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 439 { 440 TCGv_i64 t = tcg_temp_new_i64(); 441 442 tcg_gen_shri_i64(t, a, sh - 1); 443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 444 tcg_gen_vec_shr16i_i64(d, a, sh); 445 tcg_gen_vec_add16_i64(d, d, t); 446 } 447 448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 449 { 450 TCGv_i32 t; 451 452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 453 if (sh == 32) { 454 tcg_gen_extract_i32(d, a, sh - 1, 1); 455 return; 456 } 457 t = tcg_temp_new_i32(); 458 tcg_gen_extract_i32(t, a, sh - 1, 1); 459 tcg_gen_shri_i32(d, a, sh); 460 tcg_gen_add_i32(d, d, t); 461 } 462 463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 464 { 465 TCGv_i64 t = tcg_temp_new_i64(); 466 467 tcg_gen_extract_i64(t, a, sh - 1, 1); 468 tcg_gen_shri_i64(d, a, sh); 469 tcg_gen_add_i64(d, d, t); 470 } 471 472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 473 { 474 TCGv_vec t = tcg_temp_new_vec_matching(d); 475 TCGv_vec ones = tcg_temp_new_vec_matching(d); 476 477 tcg_gen_shri_vec(vece, t, a, shift - 1); 478 tcg_gen_dupi_vec(vece, ones, 1); 479 tcg_gen_and_vec(vece, t, t, ones); 480 tcg_gen_shri_vec(vece, d, a, shift); 481 tcg_gen_add_vec(vece, d, d, t); 482 } 483 484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 485 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 486 { 487 static const TCGOpcode vecop_list[] = { 488 INDEX_op_shri_vec, INDEX_op_add_vec, 0 489 }; 490 static const GVecGen2i ops[4] = { 491 { .fni8 = gen_urshr8_i64, 492 .fniv = gen_urshr_vec, 493 .fno = gen_helper_gvec_urshr_b, 494 .opt_opc = vecop_list, 495 .vece = MO_8 }, 496 { .fni8 = gen_urshr16_i64, 497 .fniv = gen_urshr_vec, 498 .fno = gen_helper_gvec_urshr_h, 499 .opt_opc = vecop_list, 500 .vece = MO_16 }, 501 { .fni4 = gen_urshr32_i32, 502 .fniv = gen_urshr_vec, 503 .fno = gen_helper_gvec_urshr_s, 504 .opt_opc = vecop_list, 505 .vece = MO_32 }, 506 { .fni8 = gen_urshr64_i64, 507 .fniv = gen_urshr_vec, 508 .fno = gen_helper_gvec_urshr_d, 509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 510 .opt_opc = vecop_list, 511 .vece = MO_64 }, 512 }; 513 514 /* tszimm encoding produces immediates in the range [1..esize] */ 515 tcg_debug_assert(shift > 0); 516 tcg_debug_assert(shift <= (8 << vece)); 517 518 if (shift == (8 << vece)) { 519 /* 520 * Shifts larger than the element size are architecturally valid. 521 * Unsigned results in zero. With rounding, this produces a 522 * copy of the most significant bit. 523 */ 524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 525 } else { 526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 527 } 528 } 529 530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 534 if (sh == 8) { 535 tcg_gen_vec_shr8i_i64(t, a, 7); 536 } else { 537 gen_urshr8_i64(t, a, sh); 538 } 539 tcg_gen_vec_add8_i64(d, d, t); 540 } 541 542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 543 { 544 TCGv_i64 t = tcg_temp_new_i64(); 545 546 if (sh == 16) { 547 tcg_gen_vec_shr16i_i64(t, a, 15); 548 } else { 549 gen_urshr16_i64(t, a, sh); 550 } 551 tcg_gen_vec_add16_i64(d, d, t); 552 } 553 554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 555 { 556 TCGv_i32 t = tcg_temp_new_i32(); 557 558 if (sh == 32) { 559 tcg_gen_shri_i32(t, a, 31); 560 } else { 561 gen_urshr32_i32(t, a, sh); 562 } 563 tcg_gen_add_i32(d, d, t); 564 } 565 566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 567 { 568 TCGv_i64 t = tcg_temp_new_i64(); 569 570 if (sh == 64) { 571 tcg_gen_shri_i64(t, a, 63); 572 } else { 573 gen_urshr64_i64(t, a, sh); 574 } 575 tcg_gen_add_i64(d, d, t); 576 } 577 578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 579 { 580 TCGv_vec t = tcg_temp_new_vec_matching(d); 581 582 if (sh == (8 << vece)) { 583 tcg_gen_shri_vec(vece, t, a, sh - 1); 584 } else { 585 gen_urshr_vec(vece, t, a, sh); 586 } 587 tcg_gen_add_vec(vece, d, d, t); 588 } 589 590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 591 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 592 { 593 static const TCGOpcode vecop_list[] = { 594 INDEX_op_shri_vec, INDEX_op_add_vec, 0 595 }; 596 static const GVecGen2i ops[4] = { 597 { .fni8 = gen_ursra8_i64, 598 .fniv = gen_ursra_vec, 599 .fno = gen_helper_gvec_ursra_b, 600 .opt_opc = vecop_list, 601 .load_dest = true, 602 .vece = MO_8 }, 603 { .fni8 = gen_ursra16_i64, 604 .fniv = gen_ursra_vec, 605 .fno = gen_helper_gvec_ursra_h, 606 .opt_opc = vecop_list, 607 .load_dest = true, 608 .vece = MO_16 }, 609 { .fni4 = gen_ursra32_i32, 610 .fniv = gen_ursra_vec, 611 .fno = gen_helper_gvec_ursra_s, 612 .opt_opc = vecop_list, 613 .load_dest = true, 614 .vece = MO_32 }, 615 { .fni8 = gen_ursra64_i64, 616 .fniv = gen_ursra_vec, 617 .fno = gen_helper_gvec_ursra_d, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 .opt_opc = vecop_list, 620 .load_dest = true, 621 .vece = MO_64 }, 622 }; 623 624 /* tszimm encoding produces immediates in the range [1..esize] */ 625 tcg_debug_assert(shift > 0); 626 tcg_debug_assert(shift <= (8 << vece)); 627 628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 629 } 630 631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 632 { 633 uint64_t mask = dup_const(MO_8, 0xff >> shift); 634 TCGv_i64 t = tcg_temp_new_i64(); 635 636 tcg_gen_shri_i64(t, a, shift); 637 tcg_gen_andi_i64(t, t, mask); 638 tcg_gen_andi_i64(d, d, ~mask); 639 tcg_gen_or_i64(d, d, t); 640 } 641 642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 643 { 644 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 645 TCGv_i64 t = tcg_temp_new_i64(); 646 647 tcg_gen_shri_i64(t, a, shift); 648 tcg_gen_andi_i64(t, t, mask); 649 tcg_gen_andi_i64(d, d, ~mask); 650 tcg_gen_or_i64(d, d, t); 651 } 652 653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 654 { 655 tcg_gen_shri_i32(a, a, shift); 656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 657 } 658 659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 660 { 661 tcg_gen_shri_i64(a, a, shift); 662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 663 } 664 665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 666 { 667 TCGv_vec t = tcg_temp_new_vec_matching(d); 668 TCGv_vec m = tcg_temp_new_vec_matching(d); 669 670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 671 tcg_gen_shri_vec(vece, t, a, sh); 672 tcg_gen_and_vec(vece, d, d, m); 673 tcg_gen_or_vec(vece, d, d, t); 674 } 675 676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 677 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 678 { 679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 680 const GVecGen2i ops[4] = { 681 { .fni8 = gen_shr8_ins_i64, 682 .fniv = gen_shr_ins_vec, 683 .fno = gen_helper_gvec_sri_b, 684 .load_dest = true, 685 .opt_opc = vecop_list, 686 .vece = MO_8 }, 687 { .fni8 = gen_shr16_ins_i64, 688 .fniv = gen_shr_ins_vec, 689 .fno = gen_helper_gvec_sri_h, 690 .load_dest = true, 691 .opt_opc = vecop_list, 692 .vece = MO_16 }, 693 { .fni4 = gen_shr32_ins_i32, 694 .fniv = gen_shr_ins_vec, 695 .fno = gen_helper_gvec_sri_s, 696 .load_dest = true, 697 .opt_opc = vecop_list, 698 .vece = MO_32 }, 699 { .fni8 = gen_shr64_ins_i64, 700 .fniv = gen_shr_ins_vec, 701 .fno = gen_helper_gvec_sri_d, 702 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 703 .load_dest = true, 704 .opt_opc = vecop_list, 705 .vece = MO_64 }, 706 }; 707 708 /* tszimm encoding produces immediates in the range [1..esize]. */ 709 tcg_debug_assert(shift > 0); 710 tcg_debug_assert(shift <= (8 << vece)); 711 712 /* Shift of esize leaves destination unchanged. */ 713 if (shift < (8 << vece)) { 714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 715 } else { 716 /* Nop, but we do need to clear the tail. */ 717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 718 } 719 } 720 721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 722 { 723 uint64_t mask = dup_const(MO_8, 0xff << shift); 724 TCGv_i64 t = tcg_temp_new_i64(); 725 726 tcg_gen_shli_i64(t, a, shift); 727 tcg_gen_andi_i64(t, t, mask); 728 tcg_gen_andi_i64(d, d, ~mask); 729 tcg_gen_or_i64(d, d, t); 730 } 731 732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 733 { 734 uint64_t mask = dup_const(MO_16, 0xffff << shift); 735 TCGv_i64 t = tcg_temp_new_i64(); 736 737 tcg_gen_shli_i64(t, a, shift); 738 tcg_gen_andi_i64(t, t, mask); 739 tcg_gen_andi_i64(d, d, ~mask); 740 tcg_gen_or_i64(d, d, t); 741 } 742 743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 744 { 745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 746 } 747 748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 749 { 750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 751 } 752 753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 754 { 755 TCGv_vec t = tcg_temp_new_vec_matching(d); 756 TCGv_vec m = tcg_temp_new_vec_matching(d); 757 758 tcg_gen_shli_vec(vece, t, a, sh); 759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 760 tcg_gen_and_vec(vece, d, d, m); 761 tcg_gen_or_vec(vece, d, d, t); 762 } 763 764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 765 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 766 { 767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 768 const GVecGen2i ops[4] = { 769 { .fni8 = gen_shl8_ins_i64, 770 .fniv = gen_shl_ins_vec, 771 .fno = gen_helper_gvec_sli_b, 772 .load_dest = true, 773 .opt_opc = vecop_list, 774 .vece = MO_8 }, 775 { .fni8 = gen_shl16_ins_i64, 776 .fniv = gen_shl_ins_vec, 777 .fno = gen_helper_gvec_sli_h, 778 .load_dest = true, 779 .opt_opc = vecop_list, 780 .vece = MO_16 }, 781 { .fni4 = gen_shl32_ins_i32, 782 .fniv = gen_shl_ins_vec, 783 .fno = gen_helper_gvec_sli_s, 784 .load_dest = true, 785 .opt_opc = vecop_list, 786 .vece = MO_32 }, 787 { .fni8 = gen_shl64_ins_i64, 788 .fniv = gen_shl_ins_vec, 789 .fno = gen_helper_gvec_sli_d, 790 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 791 .load_dest = true, 792 .opt_opc = vecop_list, 793 .vece = MO_64 }, 794 }; 795 796 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 797 tcg_debug_assert(shift >= 0); 798 tcg_debug_assert(shift < (8 << vece)); 799 800 if (shift == 0) { 801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 802 } else { 803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 804 } 805 } 806 807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 808 { 809 gen_helper_neon_mul_u8(a, a, b); 810 gen_helper_neon_add_u8(d, d, a); 811 } 812 813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 814 { 815 gen_helper_neon_mul_u8(a, a, b); 816 gen_helper_neon_sub_u8(d, d, a); 817 } 818 819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 820 { 821 gen_helper_neon_mul_u16(a, a, b); 822 gen_helper_neon_add_u16(d, d, a); 823 } 824 825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 826 { 827 gen_helper_neon_mul_u16(a, a, b); 828 gen_helper_neon_sub_u16(d, d, a); 829 } 830 831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 832 { 833 tcg_gen_mul_i32(a, a, b); 834 tcg_gen_add_i32(d, d, a); 835 } 836 837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 838 { 839 tcg_gen_mul_i32(a, a, b); 840 tcg_gen_sub_i32(d, d, a); 841 } 842 843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 844 { 845 tcg_gen_mul_i64(a, a, b); 846 tcg_gen_add_i64(d, d, a); 847 } 848 849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 850 { 851 tcg_gen_mul_i64(a, a, b); 852 tcg_gen_sub_i64(d, d, a); 853 } 854 855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 856 { 857 tcg_gen_mul_vec(vece, a, a, b); 858 tcg_gen_add_vec(vece, d, d, a); 859 } 860 861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 862 { 863 tcg_gen_mul_vec(vece, a, a, b); 864 tcg_gen_sub_vec(vece, d, d, a); 865 } 866 867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 868 * these tables are shared with AArch64 which does support them. 869 */ 870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 872 { 873 static const TCGOpcode vecop_list[] = { 874 INDEX_op_mul_vec, INDEX_op_add_vec, 0 875 }; 876 static const GVecGen3 ops[4] = { 877 { .fni4 = gen_mla8_i32, 878 .fniv = gen_mla_vec, 879 .load_dest = true, 880 .opt_opc = vecop_list, 881 .vece = MO_8 }, 882 { .fni4 = gen_mla16_i32, 883 .fniv = gen_mla_vec, 884 .load_dest = true, 885 .opt_opc = vecop_list, 886 .vece = MO_16 }, 887 { .fni4 = gen_mla32_i32, 888 .fniv = gen_mla_vec, 889 .load_dest = true, 890 .opt_opc = vecop_list, 891 .vece = MO_32 }, 892 { .fni8 = gen_mla64_i64, 893 .fniv = gen_mla_vec, 894 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 895 .load_dest = true, 896 .opt_opc = vecop_list, 897 .vece = MO_64 }, 898 }; 899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 900 } 901 902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 904 { 905 static const TCGOpcode vecop_list[] = { 906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 907 }; 908 static const GVecGen3 ops[4] = { 909 { .fni4 = gen_mls8_i32, 910 .fniv = gen_mls_vec, 911 .load_dest = true, 912 .opt_opc = vecop_list, 913 .vece = MO_8 }, 914 { .fni4 = gen_mls16_i32, 915 .fniv = gen_mls_vec, 916 .load_dest = true, 917 .opt_opc = vecop_list, 918 .vece = MO_16 }, 919 { .fni4 = gen_mls32_i32, 920 .fniv = gen_mls_vec, 921 .load_dest = true, 922 .opt_opc = vecop_list, 923 .vece = MO_32 }, 924 { .fni8 = gen_mls64_i64, 925 .fniv = gen_mls_vec, 926 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 927 .load_dest = true, 928 .opt_opc = vecop_list, 929 .vece = MO_64 }, 930 }; 931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 932 } 933 934 /* CMTST : test is "if (X & Y != 0)". */ 935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 936 { 937 tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 938 } 939 940 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 941 { 942 tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 943 } 944 945 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 946 { 947 tcg_gen_and_vec(vece, d, a, b); 948 tcg_gen_dupi_vec(vece, a, 0); 949 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 950 } 951 952 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 953 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 954 { 955 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 956 static const GVecGen3 ops[4] = { 957 { .fni4 = gen_helper_neon_tst_u8, 958 .fniv = gen_cmtst_vec, 959 .opt_opc = vecop_list, 960 .vece = MO_8 }, 961 { .fni4 = gen_helper_neon_tst_u16, 962 .fniv = gen_cmtst_vec, 963 .opt_opc = vecop_list, 964 .vece = MO_16 }, 965 { .fni4 = gen_cmtst_i32, 966 .fniv = gen_cmtst_vec, 967 .opt_opc = vecop_list, 968 .vece = MO_32 }, 969 { .fni8 = gen_cmtst_i64, 970 .fniv = gen_cmtst_vec, 971 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 972 .opt_opc = vecop_list, 973 .vece = MO_64 }, 974 }; 975 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 976 } 977 978 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 979 { 980 TCGv_i32 lval = tcg_temp_new_i32(); 981 TCGv_i32 rval = tcg_temp_new_i32(); 982 TCGv_i32 lsh = tcg_temp_new_i32(); 983 TCGv_i32 rsh = tcg_temp_new_i32(); 984 TCGv_i32 zero = tcg_constant_i32(0); 985 TCGv_i32 max = tcg_constant_i32(32); 986 987 /* 988 * Rely on the TCG guarantee that out of range shifts produce 989 * unspecified results, not undefined behaviour (i.e. no trap). 990 * Discard out-of-range results after the fact. 991 */ 992 tcg_gen_ext8s_i32(lsh, shift); 993 tcg_gen_neg_i32(rsh, lsh); 994 tcg_gen_shl_i32(lval, src, lsh); 995 tcg_gen_shr_i32(rval, src, rsh); 996 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 997 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 998 } 999 1000 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1001 { 1002 TCGv_i64 lval = tcg_temp_new_i64(); 1003 TCGv_i64 rval = tcg_temp_new_i64(); 1004 TCGv_i64 lsh = tcg_temp_new_i64(); 1005 TCGv_i64 rsh = tcg_temp_new_i64(); 1006 TCGv_i64 zero = tcg_constant_i64(0); 1007 TCGv_i64 max = tcg_constant_i64(64); 1008 1009 /* 1010 * Rely on the TCG guarantee that out of range shifts produce 1011 * unspecified results, not undefined behaviour (i.e. no trap). 1012 * Discard out-of-range results after the fact. 1013 */ 1014 tcg_gen_ext8s_i64(lsh, shift); 1015 tcg_gen_neg_i64(rsh, lsh); 1016 tcg_gen_shl_i64(lval, src, lsh); 1017 tcg_gen_shr_i64(rval, src, rsh); 1018 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1019 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1020 } 1021 1022 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1023 TCGv_vec src, TCGv_vec shift) 1024 { 1025 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1026 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1027 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1028 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1029 TCGv_vec msk, max; 1030 1031 tcg_gen_neg_vec(vece, rsh, shift); 1032 if (vece == MO_8) { 1033 tcg_gen_mov_vec(lsh, shift); 1034 } else { 1035 msk = tcg_temp_new_vec_matching(dst); 1036 tcg_gen_dupi_vec(vece, msk, 0xff); 1037 tcg_gen_and_vec(vece, lsh, shift, msk); 1038 tcg_gen_and_vec(vece, rsh, rsh, msk); 1039 } 1040 1041 /* 1042 * Rely on the TCG guarantee that out of range shifts produce 1043 * unspecified results, not undefined behaviour (i.e. no trap). 1044 * Discard out-of-range results after the fact. 1045 */ 1046 tcg_gen_shlv_vec(vece, lval, src, lsh); 1047 tcg_gen_shrv_vec(vece, rval, src, rsh); 1048 1049 max = tcg_temp_new_vec_matching(dst); 1050 tcg_gen_dupi_vec(vece, max, 8 << vece); 1051 1052 /* 1053 * The choice of LT (signed) and GEU (unsigned) are biased toward 1054 * the instructions of the x86_64 host. For MO_8, the whole byte 1055 * is significant so we must use an unsigned compare; otherwise we 1056 * have already masked to a byte and so a signed compare works. 1057 * Other tcg hosts have a full set of comparisons and do not care. 1058 */ 1059 if (vece == MO_8) { 1060 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1061 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1062 tcg_gen_andc_vec(vece, lval, lval, lsh); 1063 tcg_gen_andc_vec(vece, rval, rval, rsh); 1064 } else { 1065 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1066 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1067 tcg_gen_and_vec(vece, lval, lval, lsh); 1068 tcg_gen_and_vec(vece, rval, rval, rsh); 1069 } 1070 tcg_gen_or_vec(vece, dst, lval, rval); 1071 } 1072 1073 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1074 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1075 { 1076 static const TCGOpcode vecop_list[] = { 1077 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1078 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1079 }; 1080 static const GVecGen3 ops[4] = { 1081 { .fniv = gen_ushl_vec, 1082 .fno = gen_helper_gvec_ushl_b, 1083 .opt_opc = vecop_list, 1084 .vece = MO_8 }, 1085 { .fniv = gen_ushl_vec, 1086 .fno = gen_helper_gvec_ushl_h, 1087 .opt_opc = vecop_list, 1088 .vece = MO_16 }, 1089 { .fni4 = gen_ushl_i32, 1090 .fniv = gen_ushl_vec, 1091 .opt_opc = vecop_list, 1092 .vece = MO_32 }, 1093 { .fni8 = gen_ushl_i64, 1094 .fniv = gen_ushl_vec, 1095 .opt_opc = vecop_list, 1096 .vece = MO_64 }, 1097 }; 1098 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1099 } 1100 1101 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1102 { 1103 TCGv_i32 lval = tcg_temp_new_i32(); 1104 TCGv_i32 rval = tcg_temp_new_i32(); 1105 TCGv_i32 lsh = tcg_temp_new_i32(); 1106 TCGv_i32 rsh = tcg_temp_new_i32(); 1107 TCGv_i32 zero = tcg_constant_i32(0); 1108 TCGv_i32 max = tcg_constant_i32(31); 1109 1110 /* 1111 * Rely on the TCG guarantee that out of range shifts produce 1112 * unspecified results, not undefined behaviour (i.e. no trap). 1113 * Discard out-of-range results after the fact. 1114 */ 1115 tcg_gen_ext8s_i32(lsh, shift); 1116 tcg_gen_neg_i32(rsh, lsh); 1117 tcg_gen_shl_i32(lval, src, lsh); 1118 tcg_gen_umin_i32(rsh, rsh, max); 1119 tcg_gen_sar_i32(rval, src, rsh); 1120 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1121 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1122 } 1123 1124 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1125 { 1126 TCGv_i64 lval = tcg_temp_new_i64(); 1127 TCGv_i64 rval = tcg_temp_new_i64(); 1128 TCGv_i64 lsh = tcg_temp_new_i64(); 1129 TCGv_i64 rsh = tcg_temp_new_i64(); 1130 TCGv_i64 zero = tcg_constant_i64(0); 1131 TCGv_i64 max = tcg_constant_i64(63); 1132 1133 /* 1134 * Rely on the TCG guarantee that out of range shifts produce 1135 * unspecified results, not undefined behaviour (i.e. no trap). 1136 * Discard out-of-range results after the fact. 1137 */ 1138 tcg_gen_ext8s_i64(lsh, shift); 1139 tcg_gen_neg_i64(rsh, lsh); 1140 tcg_gen_shl_i64(lval, src, lsh); 1141 tcg_gen_umin_i64(rsh, rsh, max); 1142 tcg_gen_sar_i64(rval, src, rsh); 1143 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1144 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1145 } 1146 1147 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1148 TCGv_vec src, TCGv_vec shift) 1149 { 1150 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1151 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1152 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1153 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1154 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1155 1156 /* 1157 * Rely on the TCG guarantee that out of range shifts produce 1158 * unspecified results, not undefined behaviour (i.e. no trap). 1159 * Discard out-of-range results after the fact. 1160 */ 1161 tcg_gen_neg_vec(vece, rsh, shift); 1162 if (vece == MO_8) { 1163 tcg_gen_mov_vec(lsh, shift); 1164 } else { 1165 tcg_gen_dupi_vec(vece, tmp, 0xff); 1166 tcg_gen_and_vec(vece, lsh, shift, tmp); 1167 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1168 } 1169 1170 /* Bound rsh so out of bound right shift gets -1. */ 1171 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1172 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1173 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1174 1175 tcg_gen_shlv_vec(vece, lval, src, lsh); 1176 tcg_gen_sarv_vec(vece, rval, src, rsh); 1177 1178 /* Select in-bound left shift. */ 1179 tcg_gen_andc_vec(vece, lval, lval, tmp); 1180 1181 /* Select between left and right shift. */ 1182 if (vece == MO_8) { 1183 tcg_gen_dupi_vec(vece, tmp, 0); 1184 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1185 } else { 1186 tcg_gen_dupi_vec(vece, tmp, 0x80); 1187 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1188 } 1189 } 1190 1191 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1192 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1193 { 1194 static const TCGOpcode vecop_list[] = { 1195 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1196 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1197 }; 1198 static const GVecGen3 ops[4] = { 1199 { .fniv = gen_sshl_vec, 1200 .fno = gen_helper_gvec_sshl_b, 1201 .opt_opc = vecop_list, 1202 .vece = MO_8 }, 1203 { .fniv = gen_sshl_vec, 1204 .fno = gen_helper_gvec_sshl_h, 1205 .opt_opc = vecop_list, 1206 .vece = MO_16 }, 1207 { .fni4 = gen_sshl_i32, 1208 .fniv = gen_sshl_vec, 1209 .opt_opc = vecop_list, 1210 .vece = MO_32 }, 1211 { .fni8 = gen_sshl_i64, 1212 .fniv = gen_sshl_vec, 1213 .opt_opc = vecop_list, 1214 .vece = MO_64 }, 1215 }; 1216 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1217 } 1218 1219 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1220 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1221 { 1222 static gen_helper_gvec_3 * const fns[] = { 1223 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1224 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1225 }; 1226 tcg_debug_assert(vece <= MO_64); 1227 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1228 } 1229 1230 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1231 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1232 { 1233 static gen_helper_gvec_3 * const fns[] = { 1234 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1235 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1236 }; 1237 tcg_debug_assert(vece <= MO_64); 1238 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1239 } 1240 1241 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1242 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1243 { 1244 static gen_helper_gvec_3_ptr * const fns[] = { 1245 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1246 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1247 }; 1248 tcg_debug_assert(vece <= MO_64); 1249 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1250 opr_sz, max_sz, 0, fns[vece]); 1251 } 1252 1253 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1254 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1255 { 1256 static gen_helper_gvec_3_ptr * const fns[] = { 1257 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1258 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1259 }; 1260 tcg_debug_assert(vece <= MO_64); 1261 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1262 opr_sz, max_sz, 0, fns[vece]); 1263 } 1264 1265 void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1266 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1267 { 1268 static gen_helper_gvec_3_ptr * const fns[] = { 1269 gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1270 gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1271 }; 1272 tcg_debug_assert(vece <= MO_64); 1273 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1274 opr_sz, max_sz, 0, fns[vece]); 1275 } 1276 1277 void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1278 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1279 { 1280 static gen_helper_gvec_3_ptr * const fns[] = { 1281 gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1282 gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1283 }; 1284 tcg_debug_assert(vece <= MO_64); 1285 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1286 opr_sz, max_sz, 0, fns[vece]); 1287 } 1288 1289 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1290 { 1291 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1292 TCGv_i64 tmp = tcg_temp_new_i64(); 1293 1294 tcg_gen_add_i64(tmp, a, b); 1295 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1296 tcg_gen_xor_i64(tmp, tmp, res); 1297 tcg_gen_or_i64(qc, qc, tmp); 1298 } 1299 1300 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1301 { 1302 TCGv_i64 t = tcg_temp_new_i64(); 1303 1304 tcg_gen_add_i64(t, a, b); 1305 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1306 tcg_constant_i64(UINT64_MAX), t); 1307 tcg_gen_xor_i64(t, t, res); 1308 tcg_gen_or_i64(qc, qc, t); 1309 } 1310 1311 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1312 TCGv_vec a, TCGv_vec b) 1313 { 1314 TCGv_vec x = tcg_temp_new_vec_matching(t); 1315 tcg_gen_add_vec(vece, x, a, b); 1316 tcg_gen_usadd_vec(vece, t, a, b); 1317 tcg_gen_xor_vec(vece, x, x, t); 1318 tcg_gen_or_vec(vece, qc, qc, x); 1319 } 1320 1321 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1322 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1323 { 1324 static const TCGOpcode vecop_list[] = { 1325 INDEX_op_usadd_vec, INDEX_op_add_vec, 0 1326 }; 1327 static const GVecGen4 ops[4] = { 1328 { .fniv = gen_uqadd_vec, 1329 .fno = gen_helper_gvec_uqadd_b, 1330 .write_aofs = true, 1331 .opt_opc = vecop_list, 1332 .vece = MO_8 }, 1333 { .fniv = gen_uqadd_vec, 1334 .fno = gen_helper_gvec_uqadd_h, 1335 .write_aofs = true, 1336 .opt_opc = vecop_list, 1337 .vece = MO_16 }, 1338 { .fniv = gen_uqadd_vec, 1339 .fno = gen_helper_gvec_uqadd_s, 1340 .write_aofs = true, 1341 .opt_opc = vecop_list, 1342 .vece = MO_32 }, 1343 { .fniv = gen_uqadd_vec, 1344 .fni8 = gen_uqadd_d, 1345 .fno = gen_helper_gvec_uqadd_d, 1346 .write_aofs = true, 1347 .opt_opc = vecop_list, 1348 .vece = MO_64 }, 1349 }; 1350 1351 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1352 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1353 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1354 } 1355 1356 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1357 { 1358 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1359 int64_t min = -1ll - max; 1360 TCGv_i64 tmp = tcg_temp_new_i64(); 1361 1362 tcg_gen_add_i64(tmp, a, b); 1363 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1364 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1365 tcg_gen_xor_i64(tmp, tmp, res); 1366 tcg_gen_or_i64(qc, qc, tmp); 1367 } 1368 1369 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1370 { 1371 TCGv_i64 t0 = tcg_temp_new_i64(); 1372 TCGv_i64 t1 = tcg_temp_new_i64(); 1373 TCGv_i64 t2 = tcg_temp_new_i64(); 1374 1375 tcg_gen_add_i64(t0, a, b); 1376 1377 /* Compute signed overflow indication into T1 */ 1378 tcg_gen_xor_i64(t1, a, b); 1379 tcg_gen_xor_i64(t2, t0, a); 1380 tcg_gen_andc_i64(t1, t2, t1); 1381 1382 /* Compute saturated value into T2 */ 1383 tcg_gen_sari_i64(t2, a, 63); 1384 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1385 1386 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1387 tcg_gen_xor_i64(t0, t0, res); 1388 tcg_gen_or_i64(qc, qc, t0); 1389 } 1390 1391 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1392 TCGv_vec a, TCGv_vec b) 1393 { 1394 TCGv_vec x = tcg_temp_new_vec_matching(t); 1395 tcg_gen_add_vec(vece, x, a, b); 1396 tcg_gen_ssadd_vec(vece, t, a, b); 1397 tcg_gen_xor_vec(vece, x, x, t); 1398 tcg_gen_or_vec(vece, qc, qc, x); 1399 } 1400 1401 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1402 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1403 { 1404 static const TCGOpcode vecop_list[] = { 1405 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 1406 }; 1407 static const GVecGen4 ops[4] = { 1408 { .fniv = gen_sqadd_vec, 1409 .fno = gen_helper_gvec_sqadd_b, 1410 .opt_opc = vecop_list, 1411 .write_aofs = true, 1412 .vece = MO_8 }, 1413 { .fniv = gen_sqadd_vec, 1414 .fno = gen_helper_gvec_sqadd_h, 1415 .opt_opc = vecop_list, 1416 .write_aofs = true, 1417 .vece = MO_16 }, 1418 { .fniv = gen_sqadd_vec, 1419 .fno = gen_helper_gvec_sqadd_s, 1420 .opt_opc = vecop_list, 1421 .write_aofs = true, 1422 .vece = MO_32 }, 1423 { .fniv = gen_sqadd_vec, 1424 .fni8 = gen_sqadd_d, 1425 .fno = gen_helper_gvec_sqadd_d, 1426 .opt_opc = vecop_list, 1427 .write_aofs = true, 1428 .vece = MO_64 }, 1429 }; 1430 1431 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1432 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1433 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1434 } 1435 1436 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1437 { 1438 TCGv_i64 tmp = tcg_temp_new_i64(); 1439 1440 tcg_gen_sub_i64(tmp, a, b); 1441 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1442 tcg_gen_xor_i64(tmp, tmp, res); 1443 tcg_gen_or_i64(qc, qc, tmp); 1444 } 1445 1446 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1447 { 1448 TCGv_i64 t = tcg_temp_new_i64(); 1449 1450 tcg_gen_sub_i64(t, a, b); 1451 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1452 tcg_gen_xor_i64(t, t, res); 1453 tcg_gen_or_i64(qc, qc, t); 1454 } 1455 1456 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1457 TCGv_vec a, TCGv_vec b) 1458 { 1459 TCGv_vec x = tcg_temp_new_vec_matching(t); 1460 tcg_gen_sub_vec(vece, x, a, b); 1461 tcg_gen_ussub_vec(vece, t, a, b); 1462 tcg_gen_xor_vec(vece, x, x, t); 1463 tcg_gen_or_vec(vece, qc, qc, x); 1464 } 1465 1466 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1467 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1468 { 1469 static const TCGOpcode vecop_list[] = { 1470 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 1471 }; 1472 static const GVecGen4 ops[4] = { 1473 { .fniv = gen_uqsub_vec, 1474 .fno = gen_helper_gvec_uqsub_b, 1475 .opt_opc = vecop_list, 1476 .write_aofs = true, 1477 .vece = MO_8 }, 1478 { .fniv = gen_uqsub_vec, 1479 .fno = gen_helper_gvec_uqsub_h, 1480 .opt_opc = vecop_list, 1481 .write_aofs = true, 1482 .vece = MO_16 }, 1483 { .fniv = gen_uqsub_vec, 1484 .fno = gen_helper_gvec_uqsub_s, 1485 .opt_opc = vecop_list, 1486 .write_aofs = true, 1487 .vece = MO_32 }, 1488 { .fniv = gen_uqsub_vec, 1489 .fni8 = gen_uqsub_d, 1490 .fno = gen_helper_gvec_uqsub_d, 1491 .opt_opc = vecop_list, 1492 .write_aofs = true, 1493 .vece = MO_64 }, 1494 }; 1495 1496 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1497 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1498 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1499 } 1500 1501 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1502 { 1503 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1504 int64_t min = -1ll - max; 1505 TCGv_i64 tmp = tcg_temp_new_i64(); 1506 1507 tcg_gen_sub_i64(tmp, a, b); 1508 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1509 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1510 tcg_gen_xor_i64(tmp, tmp, res); 1511 tcg_gen_or_i64(qc, qc, tmp); 1512 } 1513 1514 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1515 { 1516 TCGv_i64 t0 = tcg_temp_new_i64(); 1517 TCGv_i64 t1 = tcg_temp_new_i64(); 1518 TCGv_i64 t2 = tcg_temp_new_i64(); 1519 1520 tcg_gen_sub_i64(t0, a, b); 1521 1522 /* Compute signed overflow indication into T1 */ 1523 tcg_gen_xor_i64(t1, a, b); 1524 tcg_gen_xor_i64(t2, t0, a); 1525 tcg_gen_and_i64(t1, t1, t2); 1526 1527 /* Compute saturated value into T2 */ 1528 tcg_gen_sari_i64(t2, a, 63); 1529 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1530 1531 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1532 tcg_gen_xor_i64(t0, t0, res); 1533 tcg_gen_or_i64(qc, qc, t0); 1534 } 1535 1536 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1537 TCGv_vec a, TCGv_vec b) 1538 { 1539 TCGv_vec x = tcg_temp_new_vec_matching(t); 1540 tcg_gen_sub_vec(vece, x, a, b); 1541 tcg_gen_sssub_vec(vece, t, a, b); 1542 tcg_gen_xor_vec(vece, x, x, t); 1543 tcg_gen_or_vec(vece, qc, qc, x); 1544 } 1545 1546 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1547 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1548 { 1549 static const TCGOpcode vecop_list[] = { 1550 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 1551 }; 1552 static const GVecGen4 ops[4] = { 1553 { .fniv = gen_sqsub_vec, 1554 .fno = gen_helper_gvec_sqsub_b, 1555 .opt_opc = vecop_list, 1556 .write_aofs = true, 1557 .vece = MO_8 }, 1558 { .fniv = gen_sqsub_vec, 1559 .fno = gen_helper_gvec_sqsub_h, 1560 .opt_opc = vecop_list, 1561 .write_aofs = true, 1562 .vece = MO_16 }, 1563 { .fniv = gen_sqsub_vec, 1564 .fno = gen_helper_gvec_sqsub_s, 1565 .opt_opc = vecop_list, 1566 .write_aofs = true, 1567 .vece = MO_32 }, 1568 { .fniv = gen_sqsub_vec, 1569 .fni8 = gen_sqsub_d, 1570 .fno = gen_helper_gvec_sqsub_d, 1571 .opt_opc = vecop_list, 1572 .write_aofs = true, 1573 .vece = MO_64 }, 1574 }; 1575 1576 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1577 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1578 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1579 } 1580 1581 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1582 { 1583 TCGv_i32 t = tcg_temp_new_i32(); 1584 1585 tcg_gen_sub_i32(t, a, b); 1586 tcg_gen_sub_i32(d, b, a); 1587 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1588 } 1589 1590 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1591 { 1592 TCGv_i64 t = tcg_temp_new_i64(); 1593 1594 tcg_gen_sub_i64(t, a, b); 1595 tcg_gen_sub_i64(d, b, a); 1596 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1597 } 1598 1599 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1600 { 1601 TCGv_vec t = tcg_temp_new_vec_matching(d); 1602 1603 tcg_gen_smin_vec(vece, t, a, b); 1604 tcg_gen_smax_vec(vece, d, a, b); 1605 tcg_gen_sub_vec(vece, d, d, t); 1606 } 1607 1608 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1609 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1610 { 1611 static const TCGOpcode vecop_list[] = { 1612 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1613 }; 1614 static const GVecGen3 ops[4] = { 1615 { .fniv = gen_sabd_vec, 1616 .fno = gen_helper_gvec_sabd_b, 1617 .opt_opc = vecop_list, 1618 .vece = MO_8 }, 1619 { .fniv = gen_sabd_vec, 1620 .fno = gen_helper_gvec_sabd_h, 1621 .opt_opc = vecop_list, 1622 .vece = MO_16 }, 1623 { .fni4 = gen_sabd_i32, 1624 .fniv = gen_sabd_vec, 1625 .fno = gen_helper_gvec_sabd_s, 1626 .opt_opc = vecop_list, 1627 .vece = MO_32 }, 1628 { .fni8 = gen_sabd_i64, 1629 .fniv = gen_sabd_vec, 1630 .fno = gen_helper_gvec_sabd_d, 1631 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1632 .opt_opc = vecop_list, 1633 .vece = MO_64 }, 1634 }; 1635 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1636 } 1637 1638 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1639 { 1640 TCGv_i32 t = tcg_temp_new_i32(); 1641 1642 tcg_gen_sub_i32(t, a, b); 1643 tcg_gen_sub_i32(d, b, a); 1644 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1645 } 1646 1647 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1648 { 1649 TCGv_i64 t = tcg_temp_new_i64(); 1650 1651 tcg_gen_sub_i64(t, a, b); 1652 tcg_gen_sub_i64(d, b, a); 1653 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1654 } 1655 1656 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1657 { 1658 TCGv_vec t = tcg_temp_new_vec_matching(d); 1659 1660 tcg_gen_umin_vec(vece, t, a, b); 1661 tcg_gen_umax_vec(vece, d, a, b); 1662 tcg_gen_sub_vec(vece, d, d, t); 1663 } 1664 1665 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1666 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1667 { 1668 static const TCGOpcode vecop_list[] = { 1669 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1670 }; 1671 static const GVecGen3 ops[4] = { 1672 { .fniv = gen_uabd_vec, 1673 .fno = gen_helper_gvec_uabd_b, 1674 .opt_opc = vecop_list, 1675 .vece = MO_8 }, 1676 { .fniv = gen_uabd_vec, 1677 .fno = gen_helper_gvec_uabd_h, 1678 .opt_opc = vecop_list, 1679 .vece = MO_16 }, 1680 { .fni4 = gen_uabd_i32, 1681 .fniv = gen_uabd_vec, 1682 .fno = gen_helper_gvec_uabd_s, 1683 .opt_opc = vecop_list, 1684 .vece = MO_32 }, 1685 { .fni8 = gen_uabd_i64, 1686 .fniv = gen_uabd_vec, 1687 .fno = gen_helper_gvec_uabd_d, 1688 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1689 .opt_opc = vecop_list, 1690 .vece = MO_64 }, 1691 }; 1692 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1693 } 1694 1695 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1696 { 1697 TCGv_i32 t = tcg_temp_new_i32(); 1698 gen_sabd_i32(t, a, b); 1699 tcg_gen_add_i32(d, d, t); 1700 } 1701 1702 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1703 { 1704 TCGv_i64 t = tcg_temp_new_i64(); 1705 gen_sabd_i64(t, a, b); 1706 tcg_gen_add_i64(d, d, t); 1707 } 1708 1709 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1710 { 1711 TCGv_vec t = tcg_temp_new_vec_matching(d); 1712 gen_sabd_vec(vece, t, a, b); 1713 tcg_gen_add_vec(vece, d, d, t); 1714 } 1715 1716 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1717 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1718 { 1719 static const TCGOpcode vecop_list[] = { 1720 INDEX_op_sub_vec, INDEX_op_add_vec, 1721 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1722 }; 1723 static const GVecGen3 ops[4] = { 1724 { .fniv = gen_saba_vec, 1725 .fno = gen_helper_gvec_saba_b, 1726 .opt_opc = vecop_list, 1727 .load_dest = true, 1728 .vece = MO_8 }, 1729 { .fniv = gen_saba_vec, 1730 .fno = gen_helper_gvec_saba_h, 1731 .opt_opc = vecop_list, 1732 .load_dest = true, 1733 .vece = MO_16 }, 1734 { .fni4 = gen_saba_i32, 1735 .fniv = gen_saba_vec, 1736 .fno = gen_helper_gvec_saba_s, 1737 .opt_opc = vecop_list, 1738 .load_dest = true, 1739 .vece = MO_32 }, 1740 { .fni8 = gen_saba_i64, 1741 .fniv = gen_saba_vec, 1742 .fno = gen_helper_gvec_saba_d, 1743 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1744 .opt_opc = vecop_list, 1745 .load_dest = true, 1746 .vece = MO_64 }, 1747 }; 1748 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1749 } 1750 1751 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1752 { 1753 TCGv_i32 t = tcg_temp_new_i32(); 1754 gen_uabd_i32(t, a, b); 1755 tcg_gen_add_i32(d, d, t); 1756 } 1757 1758 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1759 { 1760 TCGv_i64 t = tcg_temp_new_i64(); 1761 gen_uabd_i64(t, a, b); 1762 tcg_gen_add_i64(d, d, t); 1763 } 1764 1765 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1766 { 1767 TCGv_vec t = tcg_temp_new_vec_matching(d); 1768 gen_uabd_vec(vece, t, a, b); 1769 tcg_gen_add_vec(vece, d, d, t); 1770 } 1771 1772 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1773 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1774 { 1775 static const TCGOpcode vecop_list[] = { 1776 INDEX_op_sub_vec, INDEX_op_add_vec, 1777 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1778 }; 1779 static const GVecGen3 ops[4] = { 1780 { .fniv = gen_uaba_vec, 1781 .fno = gen_helper_gvec_uaba_b, 1782 .opt_opc = vecop_list, 1783 .load_dest = true, 1784 .vece = MO_8 }, 1785 { .fniv = gen_uaba_vec, 1786 .fno = gen_helper_gvec_uaba_h, 1787 .opt_opc = vecop_list, 1788 .load_dest = true, 1789 .vece = MO_16 }, 1790 { .fni4 = gen_uaba_i32, 1791 .fniv = gen_uaba_vec, 1792 .fno = gen_helper_gvec_uaba_s, 1793 .opt_opc = vecop_list, 1794 .load_dest = true, 1795 .vece = MO_32 }, 1796 { .fni8 = gen_uaba_i64, 1797 .fniv = gen_uaba_vec, 1798 .fno = gen_helper_gvec_uaba_d, 1799 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1800 .opt_opc = vecop_list, 1801 .load_dest = true, 1802 .vece = MO_64 }, 1803 }; 1804 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1805 } 1806 1807 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1808 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1809 { 1810 static gen_helper_gvec_3 * const fns[4] = { 1811 gen_helper_gvec_addp_b, 1812 gen_helper_gvec_addp_h, 1813 gen_helper_gvec_addp_s, 1814 gen_helper_gvec_addp_d, 1815 }; 1816 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1817 } 1818 1819 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1820 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1821 { 1822 static gen_helper_gvec_3 * const fns[4] = { 1823 gen_helper_gvec_smaxp_b, 1824 gen_helper_gvec_smaxp_h, 1825 gen_helper_gvec_smaxp_s, 1826 }; 1827 tcg_debug_assert(vece <= MO_32); 1828 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1829 } 1830 1831 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1832 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1833 { 1834 static gen_helper_gvec_3 * const fns[4] = { 1835 gen_helper_gvec_sminp_b, 1836 gen_helper_gvec_sminp_h, 1837 gen_helper_gvec_sminp_s, 1838 }; 1839 tcg_debug_assert(vece <= MO_32); 1840 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1841 } 1842 1843 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1844 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1845 { 1846 static gen_helper_gvec_3 * const fns[4] = { 1847 gen_helper_gvec_umaxp_b, 1848 gen_helper_gvec_umaxp_h, 1849 gen_helper_gvec_umaxp_s, 1850 }; 1851 tcg_debug_assert(vece <= MO_32); 1852 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1853 } 1854 1855 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1856 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1857 { 1858 static gen_helper_gvec_3 * const fns[4] = { 1859 gen_helper_gvec_uminp_b, 1860 gen_helper_gvec_uminp_h, 1861 gen_helper_gvec_uminp_s, 1862 }; 1863 tcg_debug_assert(vece <= MO_32); 1864 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1865 } 1866