1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 35 opr_sz, max_sz, 0, fn); 36 } 37 38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40 { 41 static gen_helper_gvec_3_ptr * const fns[2] = { 42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 43 }; 44 tcg_debug_assert(vece >= 1 && vece <= 2); 45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46 } 47 48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50 { 51 static gen_helper_gvec_3_ptr * const fns[2] = { 52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 53 }; 54 tcg_debug_assert(vece >= 1 && vece <= 2); 55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56 } 57 58 #define GEN_CMP0(NAME, COND) \ 59 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 60 uint32_t opr_sz, uint32_t max_sz) \ 61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 62 63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 68 69 #undef GEN_CMP0 70 71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72 { 73 tcg_gen_vec_sar8i_i64(a, a, shift); 74 tcg_gen_vec_add8_i64(d, d, a); 75 } 76 77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78 { 79 tcg_gen_vec_sar16i_i64(a, a, shift); 80 tcg_gen_vec_add16_i64(d, d, a); 81 } 82 83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 84 { 85 tcg_gen_sari_i32(a, a, shift); 86 tcg_gen_add_i32(d, d, a); 87 } 88 89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 90 { 91 tcg_gen_sari_i64(a, a, shift); 92 tcg_gen_add_i64(d, d, a); 93 } 94 95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 96 { 97 tcg_gen_sari_vec(vece, a, a, sh); 98 tcg_gen_add_vec(vece, d, d, a); 99 } 100 101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 102 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 103 { 104 static const TCGOpcode vecop_list[] = { 105 INDEX_op_sari_vec, INDEX_op_add_vec, 0 106 }; 107 static const GVecGen2i ops[4] = { 108 { .fni8 = gen_ssra8_i64, 109 .fniv = gen_ssra_vec, 110 .fno = gen_helper_gvec_ssra_b, 111 .load_dest = true, 112 .opt_opc = vecop_list, 113 .vece = MO_8 }, 114 { .fni8 = gen_ssra16_i64, 115 .fniv = gen_ssra_vec, 116 .fno = gen_helper_gvec_ssra_h, 117 .load_dest = true, 118 .opt_opc = vecop_list, 119 .vece = MO_16 }, 120 { .fni4 = gen_ssra32_i32, 121 .fniv = gen_ssra_vec, 122 .fno = gen_helper_gvec_ssra_s, 123 .load_dest = true, 124 .opt_opc = vecop_list, 125 .vece = MO_32 }, 126 { .fni8 = gen_ssra64_i64, 127 .fniv = gen_ssra_vec, 128 .fno = gen_helper_gvec_ssra_d, 129 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 130 .opt_opc = vecop_list, 131 .load_dest = true, 132 .vece = MO_64 }, 133 }; 134 135 /* tszimm encoding produces immediates in the range [1..esize]. */ 136 tcg_debug_assert(shift > 0); 137 tcg_debug_assert(shift <= (8 << vece)); 138 139 /* 140 * Shifts larger than the element size are architecturally valid. 141 * Signed results in all sign bits. 142 */ 143 shift = MIN(shift, (8 << vece) - 1); 144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 145 } 146 147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 148 { 149 tcg_gen_vec_shr8i_i64(a, a, shift); 150 tcg_gen_vec_add8_i64(d, d, a); 151 } 152 153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 154 { 155 tcg_gen_vec_shr16i_i64(a, a, shift); 156 tcg_gen_vec_add16_i64(d, d, a); 157 } 158 159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 160 { 161 tcg_gen_shri_i32(a, a, shift); 162 tcg_gen_add_i32(d, d, a); 163 } 164 165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 166 { 167 tcg_gen_shri_i64(a, a, shift); 168 tcg_gen_add_i64(d, d, a); 169 } 170 171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 172 { 173 tcg_gen_shri_vec(vece, a, a, sh); 174 tcg_gen_add_vec(vece, d, d, a); 175 } 176 177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 178 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 179 { 180 static const TCGOpcode vecop_list[] = { 181 INDEX_op_shri_vec, INDEX_op_add_vec, 0 182 }; 183 static const GVecGen2i ops[4] = { 184 { .fni8 = gen_usra8_i64, 185 .fniv = gen_usra_vec, 186 .fno = gen_helper_gvec_usra_b, 187 .load_dest = true, 188 .opt_opc = vecop_list, 189 .vece = MO_8, }, 190 { .fni8 = gen_usra16_i64, 191 .fniv = gen_usra_vec, 192 .fno = gen_helper_gvec_usra_h, 193 .load_dest = true, 194 .opt_opc = vecop_list, 195 .vece = MO_16, }, 196 { .fni4 = gen_usra32_i32, 197 .fniv = gen_usra_vec, 198 .fno = gen_helper_gvec_usra_s, 199 .load_dest = true, 200 .opt_opc = vecop_list, 201 .vece = MO_32, }, 202 { .fni8 = gen_usra64_i64, 203 .fniv = gen_usra_vec, 204 .fno = gen_helper_gvec_usra_d, 205 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 206 .load_dest = true, 207 .opt_opc = vecop_list, 208 .vece = MO_64, }, 209 }; 210 211 /* tszimm encoding produces immediates in the range [1..esize]. */ 212 tcg_debug_assert(shift > 0); 213 tcg_debug_assert(shift <= (8 << vece)); 214 215 /* 216 * Shifts larger than the element size are architecturally valid. 217 * Unsigned results in all zeros as input to accumulate: nop. 218 */ 219 if (shift < (8 << vece)) { 220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 221 } else { 222 /* Nop, but we do need to clear the tail. */ 223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 224 } 225 } 226 227 /* 228 * Shift one less than the requested amount, and the low bit is 229 * the rounding bit. For the 8 and 16-bit operations, because we 230 * mask the low bit, we can perform a normal integer shift instead 231 * of a vector shift. 232 */ 233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 234 { 235 TCGv_i64 t = tcg_temp_new_i64(); 236 237 tcg_gen_shri_i64(t, a, sh - 1); 238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 239 tcg_gen_vec_sar8i_i64(d, a, sh); 240 tcg_gen_vec_add8_i64(d, d, t); 241 } 242 243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 244 { 245 TCGv_i64 t = tcg_temp_new_i64(); 246 247 tcg_gen_shri_i64(t, a, sh - 1); 248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 249 tcg_gen_vec_sar16i_i64(d, a, sh); 250 tcg_gen_vec_add16_i64(d, d, t); 251 } 252 253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 254 { 255 TCGv_i32 t; 256 257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 258 if (sh == 32) { 259 tcg_gen_movi_i32(d, 0); 260 return; 261 } 262 t = tcg_temp_new_i32(); 263 tcg_gen_extract_i32(t, a, sh - 1, 1); 264 tcg_gen_sari_i32(d, a, sh); 265 tcg_gen_add_i32(d, d, t); 266 } 267 268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 269 { 270 TCGv_i64 t = tcg_temp_new_i64(); 271 272 tcg_gen_extract_i64(t, a, sh - 1, 1); 273 tcg_gen_sari_i64(d, a, sh); 274 tcg_gen_add_i64(d, d, t); 275 } 276 277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 278 { 279 TCGv_vec t = tcg_temp_new_vec_matching(d); 280 TCGv_vec ones = tcg_temp_new_vec_matching(d); 281 282 tcg_gen_shri_vec(vece, t, a, sh - 1); 283 tcg_gen_dupi_vec(vece, ones, 1); 284 tcg_gen_and_vec(vece, t, t, ones); 285 tcg_gen_sari_vec(vece, d, a, sh); 286 tcg_gen_add_vec(vece, d, d, t); 287 } 288 289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 290 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 291 { 292 static const TCGOpcode vecop_list[] = { 293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 294 }; 295 static const GVecGen2i ops[4] = { 296 { .fni8 = gen_srshr8_i64, 297 .fniv = gen_srshr_vec, 298 .fno = gen_helper_gvec_srshr_b, 299 .opt_opc = vecop_list, 300 .vece = MO_8 }, 301 { .fni8 = gen_srshr16_i64, 302 .fniv = gen_srshr_vec, 303 .fno = gen_helper_gvec_srshr_h, 304 .opt_opc = vecop_list, 305 .vece = MO_16 }, 306 { .fni4 = gen_srshr32_i32, 307 .fniv = gen_srshr_vec, 308 .fno = gen_helper_gvec_srshr_s, 309 .opt_opc = vecop_list, 310 .vece = MO_32 }, 311 { .fni8 = gen_srshr64_i64, 312 .fniv = gen_srshr_vec, 313 .fno = gen_helper_gvec_srshr_d, 314 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 315 .opt_opc = vecop_list, 316 .vece = MO_64 }, 317 }; 318 319 /* tszimm encoding produces immediates in the range [1..esize] */ 320 tcg_debug_assert(shift > 0); 321 tcg_debug_assert(shift <= (8 << vece)); 322 323 if (shift == (8 << vece)) { 324 /* 325 * Shifts larger than the element size are architecturally valid. 326 * Signed results in all sign bits. With rounding, this produces 327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 328 * I.e. always zero. 329 */ 330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 331 } else { 332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 333 } 334 } 335 336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 337 { 338 TCGv_i64 t = tcg_temp_new_i64(); 339 340 gen_srshr8_i64(t, a, sh); 341 tcg_gen_vec_add8_i64(d, d, t); 342 } 343 344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 345 { 346 TCGv_i64 t = tcg_temp_new_i64(); 347 348 gen_srshr16_i64(t, a, sh); 349 tcg_gen_vec_add16_i64(d, d, t); 350 } 351 352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 353 { 354 TCGv_i32 t = tcg_temp_new_i32(); 355 356 gen_srshr32_i32(t, a, sh); 357 tcg_gen_add_i32(d, d, t); 358 } 359 360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 361 { 362 TCGv_i64 t = tcg_temp_new_i64(); 363 364 gen_srshr64_i64(t, a, sh); 365 tcg_gen_add_i64(d, d, t); 366 } 367 368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 369 { 370 TCGv_vec t = tcg_temp_new_vec_matching(d); 371 372 gen_srshr_vec(vece, t, a, sh); 373 tcg_gen_add_vec(vece, d, d, t); 374 } 375 376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 377 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 378 { 379 static const TCGOpcode vecop_list[] = { 380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 381 }; 382 static const GVecGen2i ops[4] = { 383 { .fni8 = gen_srsra8_i64, 384 .fniv = gen_srsra_vec, 385 .fno = gen_helper_gvec_srsra_b, 386 .opt_opc = vecop_list, 387 .load_dest = true, 388 .vece = MO_8 }, 389 { .fni8 = gen_srsra16_i64, 390 .fniv = gen_srsra_vec, 391 .fno = gen_helper_gvec_srsra_h, 392 .opt_opc = vecop_list, 393 .load_dest = true, 394 .vece = MO_16 }, 395 { .fni4 = gen_srsra32_i32, 396 .fniv = gen_srsra_vec, 397 .fno = gen_helper_gvec_srsra_s, 398 .opt_opc = vecop_list, 399 .load_dest = true, 400 .vece = MO_32 }, 401 { .fni8 = gen_srsra64_i64, 402 .fniv = gen_srsra_vec, 403 .fno = gen_helper_gvec_srsra_d, 404 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 405 .opt_opc = vecop_list, 406 .load_dest = true, 407 .vece = MO_64 }, 408 }; 409 410 /* tszimm encoding produces immediates in the range [1..esize] */ 411 tcg_debug_assert(shift > 0); 412 tcg_debug_assert(shift <= (8 << vece)); 413 414 /* 415 * Shifts larger than the element size are architecturally valid. 416 * Signed results in all sign bits. With rounding, this produces 417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 418 * I.e. always zero. With accumulation, this leaves D unchanged. 419 */ 420 if (shift == (8 << vece)) { 421 /* Nop, but we do need to clear the tail. */ 422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 423 } else { 424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 425 } 426 } 427 428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 429 { 430 TCGv_i64 t = tcg_temp_new_i64(); 431 432 tcg_gen_shri_i64(t, a, sh - 1); 433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 434 tcg_gen_vec_shr8i_i64(d, a, sh); 435 tcg_gen_vec_add8_i64(d, d, t); 436 } 437 438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 439 { 440 TCGv_i64 t = tcg_temp_new_i64(); 441 442 tcg_gen_shri_i64(t, a, sh - 1); 443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 444 tcg_gen_vec_shr16i_i64(d, a, sh); 445 tcg_gen_vec_add16_i64(d, d, t); 446 } 447 448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 449 { 450 TCGv_i32 t; 451 452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 453 if (sh == 32) { 454 tcg_gen_extract_i32(d, a, sh - 1, 1); 455 return; 456 } 457 t = tcg_temp_new_i32(); 458 tcg_gen_extract_i32(t, a, sh - 1, 1); 459 tcg_gen_shri_i32(d, a, sh); 460 tcg_gen_add_i32(d, d, t); 461 } 462 463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 464 { 465 TCGv_i64 t = tcg_temp_new_i64(); 466 467 tcg_gen_extract_i64(t, a, sh - 1, 1); 468 tcg_gen_shri_i64(d, a, sh); 469 tcg_gen_add_i64(d, d, t); 470 } 471 472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 473 { 474 TCGv_vec t = tcg_temp_new_vec_matching(d); 475 TCGv_vec ones = tcg_temp_new_vec_matching(d); 476 477 tcg_gen_shri_vec(vece, t, a, shift - 1); 478 tcg_gen_dupi_vec(vece, ones, 1); 479 tcg_gen_and_vec(vece, t, t, ones); 480 tcg_gen_shri_vec(vece, d, a, shift); 481 tcg_gen_add_vec(vece, d, d, t); 482 } 483 484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 485 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 486 { 487 static const TCGOpcode vecop_list[] = { 488 INDEX_op_shri_vec, INDEX_op_add_vec, 0 489 }; 490 static const GVecGen2i ops[4] = { 491 { .fni8 = gen_urshr8_i64, 492 .fniv = gen_urshr_vec, 493 .fno = gen_helper_gvec_urshr_b, 494 .opt_opc = vecop_list, 495 .vece = MO_8 }, 496 { .fni8 = gen_urshr16_i64, 497 .fniv = gen_urshr_vec, 498 .fno = gen_helper_gvec_urshr_h, 499 .opt_opc = vecop_list, 500 .vece = MO_16 }, 501 { .fni4 = gen_urshr32_i32, 502 .fniv = gen_urshr_vec, 503 .fno = gen_helper_gvec_urshr_s, 504 .opt_opc = vecop_list, 505 .vece = MO_32 }, 506 { .fni8 = gen_urshr64_i64, 507 .fniv = gen_urshr_vec, 508 .fno = gen_helper_gvec_urshr_d, 509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 510 .opt_opc = vecop_list, 511 .vece = MO_64 }, 512 }; 513 514 /* tszimm encoding produces immediates in the range [1..esize] */ 515 tcg_debug_assert(shift > 0); 516 tcg_debug_assert(shift <= (8 << vece)); 517 518 if (shift == (8 << vece)) { 519 /* 520 * Shifts larger than the element size are architecturally valid. 521 * Unsigned results in zero. With rounding, this produces a 522 * copy of the most significant bit. 523 */ 524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 525 } else { 526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 527 } 528 } 529 530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 534 if (sh == 8) { 535 tcg_gen_vec_shr8i_i64(t, a, 7); 536 } else { 537 gen_urshr8_i64(t, a, sh); 538 } 539 tcg_gen_vec_add8_i64(d, d, t); 540 } 541 542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 543 { 544 TCGv_i64 t = tcg_temp_new_i64(); 545 546 if (sh == 16) { 547 tcg_gen_vec_shr16i_i64(t, a, 15); 548 } else { 549 gen_urshr16_i64(t, a, sh); 550 } 551 tcg_gen_vec_add16_i64(d, d, t); 552 } 553 554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 555 { 556 TCGv_i32 t = tcg_temp_new_i32(); 557 558 if (sh == 32) { 559 tcg_gen_shri_i32(t, a, 31); 560 } else { 561 gen_urshr32_i32(t, a, sh); 562 } 563 tcg_gen_add_i32(d, d, t); 564 } 565 566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 567 { 568 TCGv_i64 t = tcg_temp_new_i64(); 569 570 if (sh == 64) { 571 tcg_gen_shri_i64(t, a, 63); 572 } else { 573 gen_urshr64_i64(t, a, sh); 574 } 575 tcg_gen_add_i64(d, d, t); 576 } 577 578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 579 { 580 TCGv_vec t = tcg_temp_new_vec_matching(d); 581 582 if (sh == (8 << vece)) { 583 tcg_gen_shri_vec(vece, t, a, sh - 1); 584 } else { 585 gen_urshr_vec(vece, t, a, sh); 586 } 587 tcg_gen_add_vec(vece, d, d, t); 588 } 589 590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 591 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 592 { 593 static const TCGOpcode vecop_list[] = { 594 INDEX_op_shri_vec, INDEX_op_add_vec, 0 595 }; 596 static const GVecGen2i ops[4] = { 597 { .fni8 = gen_ursra8_i64, 598 .fniv = gen_ursra_vec, 599 .fno = gen_helper_gvec_ursra_b, 600 .opt_opc = vecop_list, 601 .load_dest = true, 602 .vece = MO_8 }, 603 { .fni8 = gen_ursra16_i64, 604 .fniv = gen_ursra_vec, 605 .fno = gen_helper_gvec_ursra_h, 606 .opt_opc = vecop_list, 607 .load_dest = true, 608 .vece = MO_16 }, 609 { .fni4 = gen_ursra32_i32, 610 .fniv = gen_ursra_vec, 611 .fno = gen_helper_gvec_ursra_s, 612 .opt_opc = vecop_list, 613 .load_dest = true, 614 .vece = MO_32 }, 615 { .fni8 = gen_ursra64_i64, 616 .fniv = gen_ursra_vec, 617 .fno = gen_helper_gvec_ursra_d, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 .opt_opc = vecop_list, 620 .load_dest = true, 621 .vece = MO_64 }, 622 }; 623 624 /* tszimm encoding produces immediates in the range [1..esize] */ 625 tcg_debug_assert(shift > 0); 626 tcg_debug_assert(shift <= (8 << vece)); 627 628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 629 } 630 631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 632 { 633 uint64_t mask = dup_const(MO_8, 0xff >> shift); 634 TCGv_i64 t = tcg_temp_new_i64(); 635 636 tcg_gen_shri_i64(t, a, shift); 637 tcg_gen_andi_i64(t, t, mask); 638 tcg_gen_andi_i64(d, d, ~mask); 639 tcg_gen_or_i64(d, d, t); 640 } 641 642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 643 { 644 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 645 TCGv_i64 t = tcg_temp_new_i64(); 646 647 tcg_gen_shri_i64(t, a, shift); 648 tcg_gen_andi_i64(t, t, mask); 649 tcg_gen_andi_i64(d, d, ~mask); 650 tcg_gen_or_i64(d, d, t); 651 } 652 653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 654 { 655 tcg_gen_shri_i32(a, a, shift); 656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 657 } 658 659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 660 { 661 tcg_gen_shri_i64(a, a, shift); 662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 663 } 664 665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 666 { 667 TCGv_vec t = tcg_temp_new_vec_matching(d); 668 TCGv_vec m = tcg_temp_new_vec_matching(d); 669 670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 671 tcg_gen_shri_vec(vece, t, a, sh); 672 tcg_gen_and_vec(vece, d, d, m); 673 tcg_gen_or_vec(vece, d, d, t); 674 } 675 676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 677 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 678 { 679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 680 const GVecGen2i ops[4] = { 681 { .fni8 = gen_shr8_ins_i64, 682 .fniv = gen_shr_ins_vec, 683 .fno = gen_helper_gvec_sri_b, 684 .load_dest = true, 685 .opt_opc = vecop_list, 686 .vece = MO_8 }, 687 { .fni8 = gen_shr16_ins_i64, 688 .fniv = gen_shr_ins_vec, 689 .fno = gen_helper_gvec_sri_h, 690 .load_dest = true, 691 .opt_opc = vecop_list, 692 .vece = MO_16 }, 693 { .fni4 = gen_shr32_ins_i32, 694 .fniv = gen_shr_ins_vec, 695 .fno = gen_helper_gvec_sri_s, 696 .load_dest = true, 697 .opt_opc = vecop_list, 698 .vece = MO_32 }, 699 { .fni8 = gen_shr64_ins_i64, 700 .fniv = gen_shr_ins_vec, 701 .fno = gen_helper_gvec_sri_d, 702 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 703 .load_dest = true, 704 .opt_opc = vecop_list, 705 .vece = MO_64 }, 706 }; 707 708 /* tszimm encoding produces immediates in the range [1..esize]. */ 709 tcg_debug_assert(shift > 0); 710 tcg_debug_assert(shift <= (8 << vece)); 711 712 /* Shift of esize leaves destination unchanged. */ 713 if (shift < (8 << vece)) { 714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 715 } else { 716 /* Nop, but we do need to clear the tail. */ 717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 718 } 719 } 720 721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 722 { 723 uint64_t mask = dup_const(MO_8, 0xff << shift); 724 TCGv_i64 t = tcg_temp_new_i64(); 725 726 tcg_gen_shli_i64(t, a, shift); 727 tcg_gen_andi_i64(t, t, mask); 728 tcg_gen_andi_i64(d, d, ~mask); 729 tcg_gen_or_i64(d, d, t); 730 } 731 732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 733 { 734 uint64_t mask = dup_const(MO_16, 0xffff << shift); 735 TCGv_i64 t = tcg_temp_new_i64(); 736 737 tcg_gen_shli_i64(t, a, shift); 738 tcg_gen_andi_i64(t, t, mask); 739 tcg_gen_andi_i64(d, d, ~mask); 740 tcg_gen_or_i64(d, d, t); 741 } 742 743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 744 { 745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 746 } 747 748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 749 { 750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 751 } 752 753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 754 { 755 TCGv_vec t = tcg_temp_new_vec_matching(d); 756 TCGv_vec m = tcg_temp_new_vec_matching(d); 757 758 tcg_gen_shli_vec(vece, t, a, sh); 759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 760 tcg_gen_and_vec(vece, d, d, m); 761 tcg_gen_or_vec(vece, d, d, t); 762 } 763 764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 765 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 766 { 767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 768 const GVecGen2i ops[4] = { 769 { .fni8 = gen_shl8_ins_i64, 770 .fniv = gen_shl_ins_vec, 771 .fno = gen_helper_gvec_sli_b, 772 .load_dest = true, 773 .opt_opc = vecop_list, 774 .vece = MO_8 }, 775 { .fni8 = gen_shl16_ins_i64, 776 .fniv = gen_shl_ins_vec, 777 .fno = gen_helper_gvec_sli_h, 778 .load_dest = true, 779 .opt_opc = vecop_list, 780 .vece = MO_16 }, 781 { .fni4 = gen_shl32_ins_i32, 782 .fniv = gen_shl_ins_vec, 783 .fno = gen_helper_gvec_sli_s, 784 .load_dest = true, 785 .opt_opc = vecop_list, 786 .vece = MO_32 }, 787 { .fni8 = gen_shl64_ins_i64, 788 .fniv = gen_shl_ins_vec, 789 .fno = gen_helper_gvec_sli_d, 790 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 791 .load_dest = true, 792 .opt_opc = vecop_list, 793 .vece = MO_64 }, 794 }; 795 796 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 797 tcg_debug_assert(shift >= 0); 798 tcg_debug_assert(shift < (8 << vece)); 799 800 if (shift == 0) { 801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 802 } else { 803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 804 } 805 } 806 807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 808 { 809 gen_helper_neon_mul_u8(a, a, b); 810 gen_helper_neon_add_u8(d, d, a); 811 } 812 813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 814 { 815 gen_helper_neon_mul_u8(a, a, b); 816 gen_helper_neon_sub_u8(d, d, a); 817 } 818 819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 820 { 821 gen_helper_neon_mul_u16(a, a, b); 822 gen_helper_neon_add_u16(d, d, a); 823 } 824 825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 826 { 827 gen_helper_neon_mul_u16(a, a, b); 828 gen_helper_neon_sub_u16(d, d, a); 829 } 830 831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 832 { 833 tcg_gen_mul_i32(a, a, b); 834 tcg_gen_add_i32(d, d, a); 835 } 836 837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 838 { 839 tcg_gen_mul_i32(a, a, b); 840 tcg_gen_sub_i32(d, d, a); 841 } 842 843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 844 { 845 tcg_gen_mul_i64(a, a, b); 846 tcg_gen_add_i64(d, d, a); 847 } 848 849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 850 { 851 tcg_gen_mul_i64(a, a, b); 852 tcg_gen_sub_i64(d, d, a); 853 } 854 855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 856 { 857 tcg_gen_mul_vec(vece, a, a, b); 858 tcg_gen_add_vec(vece, d, d, a); 859 } 860 861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 862 { 863 tcg_gen_mul_vec(vece, a, a, b); 864 tcg_gen_sub_vec(vece, d, d, a); 865 } 866 867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 868 * these tables are shared with AArch64 which does support them. 869 */ 870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 872 { 873 static const TCGOpcode vecop_list[] = { 874 INDEX_op_mul_vec, INDEX_op_add_vec, 0 875 }; 876 static const GVecGen3 ops[4] = { 877 { .fni4 = gen_mla8_i32, 878 .fniv = gen_mla_vec, 879 .load_dest = true, 880 .opt_opc = vecop_list, 881 .vece = MO_8 }, 882 { .fni4 = gen_mla16_i32, 883 .fniv = gen_mla_vec, 884 .load_dest = true, 885 .opt_opc = vecop_list, 886 .vece = MO_16 }, 887 { .fni4 = gen_mla32_i32, 888 .fniv = gen_mla_vec, 889 .load_dest = true, 890 .opt_opc = vecop_list, 891 .vece = MO_32 }, 892 { .fni8 = gen_mla64_i64, 893 .fniv = gen_mla_vec, 894 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 895 .load_dest = true, 896 .opt_opc = vecop_list, 897 .vece = MO_64 }, 898 }; 899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 900 } 901 902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 904 { 905 static const TCGOpcode vecop_list[] = { 906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 907 }; 908 static const GVecGen3 ops[4] = { 909 { .fni4 = gen_mls8_i32, 910 .fniv = gen_mls_vec, 911 .load_dest = true, 912 .opt_opc = vecop_list, 913 .vece = MO_8 }, 914 { .fni4 = gen_mls16_i32, 915 .fniv = gen_mls_vec, 916 .load_dest = true, 917 .opt_opc = vecop_list, 918 .vece = MO_16 }, 919 { .fni4 = gen_mls32_i32, 920 .fniv = gen_mls_vec, 921 .load_dest = true, 922 .opt_opc = vecop_list, 923 .vece = MO_32 }, 924 { .fni8 = gen_mls64_i64, 925 .fniv = gen_mls_vec, 926 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 927 .load_dest = true, 928 .opt_opc = vecop_list, 929 .vece = MO_64 }, 930 }; 931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 932 } 933 934 /* CMTST : test is "if (X & Y != 0)". */ 935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 936 { 937 tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 938 } 939 940 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 941 { 942 tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 943 } 944 945 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 946 { 947 tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); 948 } 949 950 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 951 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 952 { 953 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 954 static const GVecGen3 ops[4] = { 955 { .fni4 = gen_helper_neon_tst_u8, 956 .fniv = gen_cmtst_vec, 957 .opt_opc = vecop_list, 958 .vece = MO_8 }, 959 { .fni4 = gen_helper_neon_tst_u16, 960 .fniv = gen_cmtst_vec, 961 .opt_opc = vecop_list, 962 .vece = MO_16 }, 963 { .fni4 = gen_cmtst_i32, 964 .fniv = gen_cmtst_vec, 965 .opt_opc = vecop_list, 966 .vece = MO_32 }, 967 { .fni8 = gen_cmtst_i64, 968 .fniv = gen_cmtst_vec, 969 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 970 .opt_opc = vecop_list, 971 .vece = MO_64 }, 972 }; 973 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 974 } 975 976 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 977 { 978 TCGv_i32 lval = tcg_temp_new_i32(); 979 TCGv_i32 rval = tcg_temp_new_i32(); 980 TCGv_i32 lsh = tcg_temp_new_i32(); 981 TCGv_i32 rsh = tcg_temp_new_i32(); 982 TCGv_i32 zero = tcg_constant_i32(0); 983 TCGv_i32 max = tcg_constant_i32(32); 984 985 /* 986 * Rely on the TCG guarantee that out of range shifts produce 987 * unspecified results, not undefined behaviour (i.e. no trap). 988 * Discard out-of-range results after the fact. 989 */ 990 tcg_gen_ext8s_i32(lsh, shift); 991 tcg_gen_neg_i32(rsh, lsh); 992 tcg_gen_shl_i32(lval, src, lsh); 993 tcg_gen_shr_i32(rval, src, rsh); 994 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 995 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 996 } 997 998 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 999 { 1000 TCGv_i64 lval = tcg_temp_new_i64(); 1001 TCGv_i64 rval = tcg_temp_new_i64(); 1002 TCGv_i64 lsh = tcg_temp_new_i64(); 1003 TCGv_i64 rsh = tcg_temp_new_i64(); 1004 TCGv_i64 zero = tcg_constant_i64(0); 1005 TCGv_i64 max = tcg_constant_i64(64); 1006 1007 /* 1008 * Rely on the TCG guarantee that out of range shifts produce 1009 * unspecified results, not undefined behaviour (i.e. no trap). 1010 * Discard out-of-range results after the fact. 1011 */ 1012 tcg_gen_ext8s_i64(lsh, shift); 1013 tcg_gen_neg_i64(rsh, lsh); 1014 tcg_gen_shl_i64(lval, src, lsh); 1015 tcg_gen_shr_i64(rval, src, rsh); 1016 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1017 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1018 } 1019 1020 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1021 TCGv_vec src, TCGv_vec shift) 1022 { 1023 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1024 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1025 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1026 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1027 TCGv_vec msk, max; 1028 1029 tcg_gen_neg_vec(vece, rsh, shift); 1030 if (vece == MO_8) { 1031 tcg_gen_mov_vec(lsh, shift); 1032 } else { 1033 msk = tcg_temp_new_vec_matching(dst); 1034 tcg_gen_dupi_vec(vece, msk, 0xff); 1035 tcg_gen_and_vec(vece, lsh, shift, msk); 1036 tcg_gen_and_vec(vece, rsh, rsh, msk); 1037 } 1038 1039 /* 1040 * Rely on the TCG guarantee that out of range shifts produce 1041 * unspecified results, not undefined behaviour (i.e. no trap). 1042 * Discard out-of-range results after the fact. 1043 */ 1044 tcg_gen_shlv_vec(vece, lval, src, lsh); 1045 tcg_gen_shrv_vec(vece, rval, src, rsh); 1046 1047 max = tcg_temp_new_vec_matching(dst); 1048 tcg_gen_dupi_vec(vece, max, 8 << vece); 1049 1050 /* 1051 * The choice of LT (signed) and GEU (unsigned) are biased toward 1052 * the instructions of the x86_64 host. For MO_8, the whole byte 1053 * is significant so we must use an unsigned compare; otherwise we 1054 * have already masked to a byte and so a signed compare works. 1055 * Other tcg hosts have a full set of comparisons and do not care. 1056 */ 1057 if (vece == MO_8) { 1058 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1059 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1060 tcg_gen_andc_vec(vece, lval, lval, lsh); 1061 tcg_gen_andc_vec(vece, rval, rval, rsh); 1062 } else { 1063 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1064 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1065 tcg_gen_and_vec(vece, lval, lval, lsh); 1066 tcg_gen_and_vec(vece, rval, rval, rsh); 1067 } 1068 tcg_gen_or_vec(vece, dst, lval, rval); 1069 } 1070 1071 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1072 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1073 { 1074 static const TCGOpcode vecop_list[] = { 1075 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1076 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1077 }; 1078 static const GVecGen3 ops[4] = { 1079 { .fniv = gen_ushl_vec, 1080 .fno = gen_helper_gvec_ushl_b, 1081 .opt_opc = vecop_list, 1082 .vece = MO_8 }, 1083 { .fniv = gen_ushl_vec, 1084 .fno = gen_helper_gvec_ushl_h, 1085 .opt_opc = vecop_list, 1086 .vece = MO_16 }, 1087 { .fni4 = gen_ushl_i32, 1088 .fniv = gen_ushl_vec, 1089 .opt_opc = vecop_list, 1090 .vece = MO_32 }, 1091 { .fni8 = gen_ushl_i64, 1092 .fniv = gen_ushl_vec, 1093 .opt_opc = vecop_list, 1094 .vece = MO_64 }, 1095 }; 1096 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1097 } 1098 1099 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1100 { 1101 TCGv_i32 lval = tcg_temp_new_i32(); 1102 TCGv_i32 rval = tcg_temp_new_i32(); 1103 TCGv_i32 lsh = tcg_temp_new_i32(); 1104 TCGv_i32 rsh = tcg_temp_new_i32(); 1105 TCGv_i32 zero = tcg_constant_i32(0); 1106 TCGv_i32 max = tcg_constant_i32(31); 1107 1108 /* 1109 * Rely on the TCG guarantee that out of range shifts produce 1110 * unspecified results, not undefined behaviour (i.e. no trap). 1111 * Discard out-of-range results after the fact. 1112 */ 1113 tcg_gen_ext8s_i32(lsh, shift); 1114 tcg_gen_neg_i32(rsh, lsh); 1115 tcg_gen_shl_i32(lval, src, lsh); 1116 tcg_gen_umin_i32(rsh, rsh, max); 1117 tcg_gen_sar_i32(rval, src, rsh); 1118 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1119 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1120 } 1121 1122 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1123 { 1124 TCGv_i64 lval = tcg_temp_new_i64(); 1125 TCGv_i64 rval = tcg_temp_new_i64(); 1126 TCGv_i64 lsh = tcg_temp_new_i64(); 1127 TCGv_i64 rsh = tcg_temp_new_i64(); 1128 TCGv_i64 zero = tcg_constant_i64(0); 1129 TCGv_i64 max = tcg_constant_i64(63); 1130 1131 /* 1132 * Rely on the TCG guarantee that out of range shifts produce 1133 * unspecified results, not undefined behaviour (i.e. no trap). 1134 * Discard out-of-range results after the fact. 1135 */ 1136 tcg_gen_ext8s_i64(lsh, shift); 1137 tcg_gen_neg_i64(rsh, lsh); 1138 tcg_gen_shl_i64(lval, src, lsh); 1139 tcg_gen_umin_i64(rsh, rsh, max); 1140 tcg_gen_sar_i64(rval, src, rsh); 1141 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1142 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1143 } 1144 1145 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1146 TCGv_vec src, TCGv_vec shift) 1147 { 1148 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1149 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1150 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1151 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1152 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1153 1154 /* 1155 * Rely on the TCG guarantee that out of range shifts produce 1156 * unspecified results, not undefined behaviour (i.e. no trap). 1157 * Discard out-of-range results after the fact. 1158 */ 1159 tcg_gen_neg_vec(vece, rsh, shift); 1160 if (vece == MO_8) { 1161 tcg_gen_mov_vec(lsh, shift); 1162 } else { 1163 tcg_gen_dupi_vec(vece, tmp, 0xff); 1164 tcg_gen_and_vec(vece, lsh, shift, tmp); 1165 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1166 } 1167 1168 /* Bound rsh so out of bound right shift gets -1. */ 1169 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1170 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1171 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1172 1173 tcg_gen_shlv_vec(vece, lval, src, lsh); 1174 tcg_gen_sarv_vec(vece, rval, src, rsh); 1175 1176 /* Select in-bound left shift. */ 1177 tcg_gen_andc_vec(vece, lval, lval, tmp); 1178 1179 /* Select between left and right shift. */ 1180 if (vece == MO_8) { 1181 tcg_gen_dupi_vec(vece, tmp, 0); 1182 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1183 } else { 1184 tcg_gen_dupi_vec(vece, tmp, 0x80); 1185 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1186 } 1187 } 1188 1189 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1190 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1191 { 1192 static const TCGOpcode vecop_list[] = { 1193 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1194 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1195 }; 1196 static const GVecGen3 ops[4] = { 1197 { .fniv = gen_sshl_vec, 1198 .fno = gen_helper_gvec_sshl_b, 1199 .opt_opc = vecop_list, 1200 .vece = MO_8 }, 1201 { .fniv = gen_sshl_vec, 1202 .fno = gen_helper_gvec_sshl_h, 1203 .opt_opc = vecop_list, 1204 .vece = MO_16 }, 1205 { .fni4 = gen_sshl_i32, 1206 .fniv = gen_sshl_vec, 1207 .opt_opc = vecop_list, 1208 .vece = MO_32 }, 1209 { .fni8 = gen_sshl_i64, 1210 .fniv = gen_sshl_vec, 1211 .opt_opc = vecop_list, 1212 .vece = MO_64 }, 1213 }; 1214 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1215 } 1216 1217 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1218 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1219 { 1220 static gen_helper_gvec_3 * const fns[] = { 1221 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1222 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1223 }; 1224 tcg_debug_assert(vece <= MO_64); 1225 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1226 } 1227 1228 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1229 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1230 { 1231 static gen_helper_gvec_3 * const fns[] = { 1232 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1233 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1234 }; 1235 tcg_debug_assert(vece <= MO_64); 1236 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1237 } 1238 1239 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1240 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1241 { 1242 static gen_helper_gvec_3_ptr * const fns[] = { 1243 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1244 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1245 }; 1246 tcg_debug_assert(vece <= MO_64); 1247 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1248 opr_sz, max_sz, 0, fns[vece]); 1249 } 1250 1251 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1252 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1253 { 1254 static gen_helper_gvec_3_ptr * const fns[] = { 1255 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1256 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1257 }; 1258 tcg_debug_assert(vece <= MO_64); 1259 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1260 opr_sz, max_sz, 0, fns[vece]); 1261 } 1262 1263 void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1264 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1265 { 1266 static gen_helper_gvec_3_ptr * const fns[] = { 1267 gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1268 gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1269 }; 1270 tcg_debug_assert(vece <= MO_64); 1271 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1272 opr_sz, max_sz, 0, fns[vece]); 1273 } 1274 1275 void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1276 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1277 { 1278 static gen_helper_gvec_3_ptr * const fns[] = { 1279 gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1280 gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1281 }; 1282 tcg_debug_assert(vece <= MO_64); 1283 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1284 opr_sz, max_sz, 0, fns[vece]); 1285 } 1286 1287 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1288 { 1289 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1290 TCGv_i64 tmp = tcg_temp_new_i64(); 1291 1292 tcg_gen_add_i64(tmp, a, b); 1293 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1294 tcg_gen_xor_i64(tmp, tmp, res); 1295 tcg_gen_or_i64(qc, qc, tmp); 1296 } 1297 1298 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1299 { 1300 TCGv_i64 t = tcg_temp_new_i64(); 1301 1302 tcg_gen_add_i64(t, a, b); 1303 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1304 tcg_constant_i64(UINT64_MAX), t); 1305 tcg_gen_xor_i64(t, t, res); 1306 tcg_gen_or_i64(qc, qc, t); 1307 } 1308 1309 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1310 TCGv_vec a, TCGv_vec b) 1311 { 1312 TCGv_vec x = tcg_temp_new_vec_matching(t); 1313 tcg_gen_add_vec(vece, x, a, b); 1314 tcg_gen_usadd_vec(vece, t, a, b); 1315 tcg_gen_xor_vec(vece, x, x, t); 1316 tcg_gen_or_vec(vece, qc, qc, x); 1317 } 1318 1319 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1320 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1321 { 1322 static const TCGOpcode vecop_list[] = { 1323 INDEX_op_usadd_vec, INDEX_op_add_vec, 0 1324 }; 1325 static const GVecGen4 ops[4] = { 1326 { .fniv = gen_uqadd_vec, 1327 .fno = gen_helper_gvec_uqadd_b, 1328 .write_aofs = true, 1329 .opt_opc = vecop_list, 1330 .vece = MO_8 }, 1331 { .fniv = gen_uqadd_vec, 1332 .fno = gen_helper_gvec_uqadd_h, 1333 .write_aofs = true, 1334 .opt_opc = vecop_list, 1335 .vece = MO_16 }, 1336 { .fniv = gen_uqadd_vec, 1337 .fno = gen_helper_gvec_uqadd_s, 1338 .write_aofs = true, 1339 .opt_opc = vecop_list, 1340 .vece = MO_32 }, 1341 { .fniv = gen_uqadd_vec, 1342 .fni8 = gen_uqadd_d, 1343 .fno = gen_helper_gvec_uqadd_d, 1344 .write_aofs = true, 1345 .opt_opc = vecop_list, 1346 .vece = MO_64 }, 1347 }; 1348 1349 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1350 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1351 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1352 } 1353 1354 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1355 { 1356 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1357 int64_t min = -1ll - max; 1358 TCGv_i64 tmp = tcg_temp_new_i64(); 1359 1360 tcg_gen_add_i64(tmp, a, b); 1361 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1362 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1363 tcg_gen_xor_i64(tmp, tmp, res); 1364 tcg_gen_or_i64(qc, qc, tmp); 1365 } 1366 1367 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1368 { 1369 TCGv_i64 t0 = tcg_temp_new_i64(); 1370 TCGv_i64 t1 = tcg_temp_new_i64(); 1371 TCGv_i64 t2 = tcg_temp_new_i64(); 1372 1373 tcg_gen_add_i64(t0, a, b); 1374 1375 /* Compute signed overflow indication into T1 */ 1376 tcg_gen_xor_i64(t1, a, b); 1377 tcg_gen_xor_i64(t2, t0, a); 1378 tcg_gen_andc_i64(t1, t2, t1); 1379 1380 /* Compute saturated value into T2 */ 1381 tcg_gen_sari_i64(t2, a, 63); 1382 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1383 1384 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1385 tcg_gen_xor_i64(t0, t0, res); 1386 tcg_gen_or_i64(qc, qc, t0); 1387 } 1388 1389 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1390 TCGv_vec a, TCGv_vec b) 1391 { 1392 TCGv_vec x = tcg_temp_new_vec_matching(t); 1393 tcg_gen_add_vec(vece, x, a, b); 1394 tcg_gen_ssadd_vec(vece, t, a, b); 1395 tcg_gen_xor_vec(vece, x, x, t); 1396 tcg_gen_or_vec(vece, qc, qc, x); 1397 } 1398 1399 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1400 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1401 { 1402 static const TCGOpcode vecop_list[] = { 1403 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 1404 }; 1405 static const GVecGen4 ops[4] = { 1406 { .fniv = gen_sqadd_vec, 1407 .fno = gen_helper_gvec_sqadd_b, 1408 .opt_opc = vecop_list, 1409 .write_aofs = true, 1410 .vece = MO_8 }, 1411 { .fniv = gen_sqadd_vec, 1412 .fno = gen_helper_gvec_sqadd_h, 1413 .opt_opc = vecop_list, 1414 .write_aofs = true, 1415 .vece = MO_16 }, 1416 { .fniv = gen_sqadd_vec, 1417 .fno = gen_helper_gvec_sqadd_s, 1418 .opt_opc = vecop_list, 1419 .write_aofs = true, 1420 .vece = MO_32 }, 1421 { .fniv = gen_sqadd_vec, 1422 .fni8 = gen_sqadd_d, 1423 .fno = gen_helper_gvec_sqadd_d, 1424 .opt_opc = vecop_list, 1425 .write_aofs = true, 1426 .vece = MO_64 }, 1427 }; 1428 1429 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1430 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1431 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1432 } 1433 1434 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1435 { 1436 TCGv_i64 tmp = tcg_temp_new_i64(); 1437 1438 tcg_gen_sub_i64(tmp, a, b); 1439 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1440 tcg_gen_xor_i64(tmp, tmp, res); 1441 tcg_gen_or_i64(qc, qc, tmp); 1442 } 1443 1444 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1445 { 1446 TCGv_i64 t = tcg_temp_new_i64(); 1447 1448 tcg_gen_sub_i64(t, a, b); 1449 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1450 tcg_gen_xor_i64(t, t, res); 1451 tcg_gen_or_i64(qc, qc, t); 1452 } 1453 1454 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1455 TCGv_vec a, TCGv_vec b) 1456 { 1457 TCGv_vec x = tcg_temp_new_vec_matching(t); 1458 tcg_gen_sub_vec(vece, x, a, b); 1459 tcg_gen_ussub_vec(vece, t, a, b); 1460 tcg_gen_xor_vec(vece, x, x, t); 1461 tcg_gen_or_vec(vece, qc, qc, x); 1462 } 1463 1464 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1465 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1466 { 1467 static const TCGOpcode vecop_list[] = { 1468 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 1469 }; 1470 static const GVecGen4 ops[4] = { 1471 { .fniv = gen_uqsub_vec, 1472 .fno = gen_helper_gvec_uqsub_b, 1473 .opt_opc = vecop_list, 1474 .write_aofs = true, 1475 .vece = MO_8 }, 1476 { .fniv = gen_uqsub_vec, 1477 .fno = gen_helper_gvec_uqsub_h, 1478 .opt_opc = vecop_list, 1479 .write_aofs = true, 1480 .vece = MO_16 }, 1481 { .fniv = gen_uqsub_vec, 1482 .fno = gen_helper_gvec_uqsub_s, 1483 .opt_opc = vecop_list, 1484 .write_aofs = true, 1485 .vece = MO_32 }, 1486 { .fniv = gen_uqsub_vec, 1487 .fni8 = gen_uqsub_d, 1488 .fno = gen_helper_gvec_uqsub_d, 1489 .opt_opc = vecop_list, 1490 .write_aofs = true, 1491 .vece = MO_64 }, 1492 }; 1493 1494 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1495 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1496 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1497 } 1498 1499 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1500 { 1501 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1502 int64_t min = -1ll - max; 1503 TCGv_i64 tmp = tcg_temp_new_i64(); 1504 1505 tcg_gen_sub_i64(tmp, a, b); 1506 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1507 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1508 tcg_gen_xor_i64(tmp, tmp, res); 1509 tcg_gen_or_i64(qc, qc, tmp); 1510 } 1511 1512 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1513 { 1514 TCGv_i64 t0 = tcg_temp_new_i64(); 1515 TCGv_i64 t1 = tcg_temp_new_i64(); 1516 TCGv_i64 t2 = tcg_temp_new_i64(); 1517 1518 tcg_gen_sub_i64(t0, a, b); 1519 1520 /* Compute signed overflow indication into T1 */ 1521 tcg_gen_xor_i64(t1, a, b); 1522 tcg_gen_xor_i64(t2, t0, a); 1523 tcg_gen_and_i64(t1, t1, t2); 1524 1525 /* Compute saturated value into T2 */ 1526 tcg_gen_sari_i64(t2, a, 63); 1527 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1528 1529 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1530 tcg_gen_xor_i64(t0, t0, res); 1531 tcg_gen_or_i64(qc, qc, t0); 1532 } 1533 1534 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1535 TCGv_vec a, TCGv_vec b) 1536 { 1537 TCGv_vec x = tcg_temp_new_vec_matching(t); 1538 tcg_gen_sub_vec(vece, x, a, b); 1539 tcg_gen_sssub_vec(vece, t, a, b); 1540 tcg_gen_xor_vec(vece, x, x, t); 1541 tcg_gen_or_vec(vece, qc, qc, x); 1542 } 1543 1544 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1545 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1546 { 1547 static const TCGOpcode vecop_list[] = { 1548 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 1549 }; 1550 static const GVecGen4 ops[4] = { 1551 { .fniv = gen_sqsub_vec, 1552 .fno = gen_helper_gvec_sqsub_b, 1553 .opt_opc = vecop_list, 1554 .write_aofs = true, 1555 .vece = MO_8 }, 1556 { .fniv = gen_sqsub_vec, 1557 .fno = gen_helper_gvec_sqsub_h, 1558 .opt_opc = vecop_list, 1559 .write_aofs = true, 1560 .vece = MO_16 }, 1561 { .fniv = gen_sqsub_vec, 1562 .fno = gen_helper_gvec_sqsub_s, 1563 .opt_opc = vecop_list, 1564 .write_aofs = true, 1565 .vece = MO_32 }, 1566 { .fniv = gen_sqsub_vec, 1567 .fni8 = gen_sqsub_d, 1568 .fno = gen_helper_gvec_sqsub_d, 1569 .opt_opc = vecop_list, 1570 .write_aofs = true, 1571 .vece = MO_64 }, 1572 }; 1573 1574 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1575 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1576 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1577 } 1578 1579 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1580 { 1581 TCGv_i32 t = tcg_temp_new_i32(); 1582 1583 tcg_gen_sub_i32(t, a, b); 1584 tcg_gen_sub_i32(d, b, a); 1585 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1586 } 1587 1588 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1589 { 1590 TCGv_i64 t = tcg_temp_new_i64(); 1591 1592 tcg_gen_sub_i64(t, a, b); 1593 tcg_gen_sub_i64(d, b, a); 1594 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1595 } 1596 1597 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1598 { 1599 TCGv_vec t = tcg_temp_new_vec_matching(d); 1600 1601 tcg_gen_smin_vec(vece, t, a, b); 1602 tcg_gen_smax_vec(vece, d, a, b); 1603 tcg_gen_sub_vec(vece, d, d, t); 1604 } 1605 1606 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1607 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1608 { 1609 static const TCGOpcode vecop_list[] = { 1610 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1611 }; 1612 static const GVecGen3 ops[4] = { 1613 { .fniv = gen_sabd_vec, 1614 .fno = gen_helper_gvec_sabd_b, 1615 .opt_opc = vecop_list, 1616 .vece = MO_8 }, 1617 { .fniv = gen_sabd_vec, 1618 .fno = gen_helper_gvec_sabd_h, 1619 .opt_opc = vecop_list, 1620 .vece = MO_16 }, 1621 { .fni4 = gen_sabd_i32, 1622 .fniv = gen_sabd_vec, 1623 .fno = gen_helper_gvec_sabd_s, 1624 .opt_opc = vecop_list, 1625 .vece = MO_32 }, 1626 { .fni8 = gen_sabd_i64, 1627 .fniv = gen_sabd_vec, 1628 .fno = gen_helper_gvec_sabd_d, 1629 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1630 .opt_opc = vecop_list, 1631 .vece = MO_64 }, 1632 }; 1633 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1634 } 1635 1636 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1637 { 1638 TCGv_i32 t = tcg_temp_new_i32(); 1639 1640 tcg_gen_sub_i32(t, a, b); 1641 tcg_gen_sub_i32(d, b, a); 1642 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1643 } 1644 1645 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1646 { 1647 TCGv_i64 t = tcg_temp_new_i64(); 1648 1649 tcg_gen_sub_i64(t, a, b); 1650 tcg_gen_sub_i64(d, b, a); 1651 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1652 } 1653 1654 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1655 { 1656 TCGv_vec t = tcg_temp_new_vec_matching(d); 1657 1658 tcg_gen_umin_vec(vece, t, a, b); 1659 tcg_gen_umax_vec(vece, d, a, b); 1660 tcg_gen_sub_vec(vece, d, d, t); 1661 } 1662 1663 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1664 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1665 { 1666 static const TCGOpcode vecop_list[] = { 1667 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1668 }; 1669 static const GVecGen3 ops[4] = { 1670 { .fniv = gen_uabd_vec, 1671 .fno = gen_helper_gvec_uabd_b, 1672 .opt_opc = vecop_list, 1673 .vece = MO_8 }, 1674 { .fniv = gen_uabd_vec, 1675 .fno = gen_helper_gvec_uabd_h, 1676 .opt_opc = vecop_list, 1677 .vece = MO_16 }, 1678 { .fni4 = gen_uabd_i32, 1679 .fniv = gen_uabd_vec, 1680 .fno = gen_helper_gvec_uabd_s, 1681 .opt_opc = vecop_list, 1682 .vece = MO_32 }, 1683 { .fni8 = gen_uabd_i64, 1684 .fniv = gen_uabd_vec, 1685 .fno = gen_helper_gvec_uabd_d, 1686 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1687 .opt_opc = vecop_list, 1688 .vece = MO_64 }, 1689 }; 1690 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1691 } 1692 1693 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1694 { 1695 TCGv_i32 t = tcg_temp_new_i32(); 1696 gen_sabd_i32(t, a, b); 1697 tcg_gen_add_i32(d, d, t); 1698 } 1699 1700 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1701 { 1702 TCGv_i64 t = tcg_temp_new_i64(); 1703 gen_sabd_i64(t, a, b); 1704 tcg_gen_add_i64(d, d, t); 1705 } 1706 1707 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1708 { 1709 TCGv_vec t = tcg_temp_new_vec_matching(d); 1710 gen_sabd_vec(vece, t, a, b); 1711 tcg_gen_add_vec(vece, d, d, t); 1712 } 1713 1714 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1715 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1716 { 1717 static const TCGOpcode vecop_list[] = { 1718 INDEX_op_sub_vec, INDEX_op_add_vec, 1719 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1720 }; 1721 static const GVecGen3 ops[4] = { 1722 { .fniv = gen_saba_vec, 1723 .fno = gen_helper_gvec_saba_b, 1724 .opt_opc = vecop_list, 1725 .load_dest = true, 1726 .vece = MO_8 }, 1727 { .fniv = gen_saba_vec, 1728 .fno = gen_helper_gvec_saba_h, 1729 .opt_opc = vecop_list, 1730 .load_dest = true, 1731 .vece = MO_16 }, 1732 { .fni4 = gen_saba_i32, 1733 .fniv = gen_saba_vec, 1734 .fno = gen_helper_gvec_saba_s, 1735 .opt_opc = vecop_list, 1736 .load_dest = true, 1737 .vece = MO_32 }, 1738 { .fni8 = gen_saba_i64, 1739 .fniv = gen_saba_vec, 1740 .fno = gen_helper_gvec_saba_d, 1741 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1742 .opt_opc = vecop_list, 1743 .load_dest = true, 1744 .vece = MO_64 }, 1745 }; 1746 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1747 } 1748 1749 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1750 { 1751 TCGv_i32 t = tcg_temp_new_i32(); 1752 gen_uabd_i32(t, a, b); 1753 tcg_gen_add_i32(d, d, t); 1754 } 1755 1756 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1757 { 1758 TCGv_i64 t = tcg_temp_new_i64(); 1759 gen_uabd_i64(t, a, b); 1760 tcg_gen_add_i64(d, d, t); 1761 } 1762 1763 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1764 { 1765 TCGv_vec t = tcg_temp_new_vec_matching(d); 1766 gen_uabd_vec(vece, t, a, b); 1767 tcg_gen_add_vec(vece, d, d, t); 1768 } 1769 1770 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1771 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1772 { 1773 static const TCGOpcode vecop_list[] = { 1774 INDEX_op_sub_vec, INDEX_op_add_vec, 1775 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1776 }; 1777 static const GVecGen3 ops[4] = { 1778 { .fniv = gen_uaba_vec, 1779 .fno = gen_helper_gvec_uaba_b, 1780 .opt_opc = vecop_list, 1781 .load_dest = true, 1782 .vece = MO_8 }, 1783 { .fniv = gen_uaba_vec, 1784 .fno = gen_helper_gvec_uaba_h, 1785 .opt_opc = vecop_list, 1786 .load_dest = true, 1787 .vece = MO_16 }, 1788 { .fni4 = gen_uaba_i32, 1789 .fniv = gen_uaba_vec, 1790 .fno = gen_helper_gvec_uaba_s, 1791 .opt_opc = vecop_list, 1792 .load_dest = true, 1793 .vece = MO_32 }, 1794 { .fni8 = gen_uaba_i64, 1795 .fniv = gen_uaba_vec, 1796 .fno = gen_helper_gvec_uaba_d, 1797 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1798 .opt_opc = vecop_list, 1799 .load_dest = true, 1800 .vece = MO_64 }, 1801 }; 1802 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1803 } 1804 1805 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1806 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1807 { 1808 static gen_helper_gvec_3 * const fns[4] = { 1809 gen_helper_gvec_addp_b, 1810 gen_helper_gvec_addp_h, 1811 gen_helper_gvec_addp_s, 1812 gen_helper_gvec_addp_d, 1813 }; 1814 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1815 } 1816 1817 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1818 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1819 { 1820 static gen_helper_gvec_3 * const fns[4] = { 1821 gen_helper_gvec_smaxp_b, 1822 gen_helper_gvec_smaxp_h, 1823 gen_helper_gvec_smaxp_s, 1824 }; 1825 tcg_debug_assert(vece <= MO_32); 1826 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1827 } 1828 1829 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1830 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1831 { 1832 static gen_helper_gvec_3 * const fns[4] = { 1833 gen_helper_gvec_sminp_b, 1834 gen_helper_gvec_sminp_h, 1835 gen_helper_gvec_sminp_s, 1836 }; 1837 tcg_debug_assert(vece <= MO_32); 1838 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1839 } 1840 1841 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1842 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1843 { 1844 static gen_helper_gvec_3 * const fns[4] = { 1845 gen_helper_gvec_umaxp_b, 1846 gen_helper_gvec_umaxp_h, 1847 gen_helper_gvec_umaxp_s, 1848 }; 1849 tcg_debug_assert(vece <= MO_32); 1850 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1851 } 1852 1853 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1854 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1855 { 1856 static gen_helper_gvec_3 * const fns[4] = { 1857 gen_helper_gvec_uminp_b, 1858 gen_helper_gvec_uminp_h, 1859 gen_helper_gvec_uminp_s, 1860 }; 1861 tcg_debug_assert(vece <= MO_32); 1862 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1863 } 1864 1865 static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1866 { 1867 TCGv_i64 t = tcg_temp_new_i64(); 1868 1869 tcg_gen_and_i64(t, a, b); 1870 tcg_gen_vec_sar8i_i64(a, a, 1); 1871 tcg_gen_vec_sar8i_i64(b, b, 1); 1872 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1873 tcg_gen_vec_add8_i64(d, a, b); 1874 tcg_gen_vec_add8_i64(d, d, t); 1875 } 1876 1877 static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1878 { 1879 TCGv_i64 t = tcg_temp_new_i64(); 1880 1881 tcg_gen_and_i64(t, a, b); 1882 tcg_gen_vec_sar16i_i64(a, a, 1); 1883 tcg_gen_vec_sar16i_i64(b, b, 1); 1884 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1885 tcg_gen_vec_add16_i64(d, a, b); 1886 tcg_gen_vec_add16_i64(d, d, t); 1887 } 1888 1889 static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1890 { 1891 TCGv_i32 t = tcg_temp_new_i32(); 1892 1893 tcg_gen_and_i32(t, a, b); 1894 tcg_gen_sari_i32(a, a, 1); 1895 tcg_gen_sari_i32(b, b, 1); 1896 tcg_gen_andi_i32(t, t, 1); 1897 tcg_gen_add_i32(d, a, b); 1898 tcg_gen_add_i32(d, d, t); 1899 } 1900 1901 static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1902 { 1903 TCGv_vec t = tcg_temp_new_vec_matching(d); 1904 1905 tcg_gen_and_vec(vece, t, a, b); 1906 tcg_gen_sari_vec(vece, a, a, 1); 1907 tcg_gen_sari_vec(vece, b, b, 1); 1908 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1909 tcg_gen_add_vec(vece, d, a, b); 1910 tcg_gen_add_vec(vece, d, d, t); 1911 } 1912 1913 void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1914 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1915 { 1916 static const TCGOpcode vecop_list[] = { 1917 INDEX_op_sari_vec, INDEX_op_add_vec, 0 1918 }; 1919 static const GVecGen3 g[] = { 1920 { .fni8 = gen_shadd8_i64, 1921 .fniv = gen_shadd_vec, 1922 .opt_opc = vecop_list, 1923 .vece = MO_8 }, 1924 { .fni8 = gen_shadd16_i64, 1925 .fniv = gen_shadd_vec, 1926 .opt_opc = vecop_list, 1927 .vece = MO_16 }, 1928 { .fni4 = gen_shadd_i32, 1929 .fniv = gen_shadd_vec, 1930 .opt_opc = vecop_list, 1931 .vece = MO_32 }, 1932 }; 1933 tcg_debug_assert(vece <= MO_32); 1934 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 1935 } 1936 1937 static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1938 { 1939 TCGv_i64 t = tcg_temp_new_i64(); 1940 1941 tcg_gen_and_i64(t, a, b); 1942 tcg_gen_vec_shr8i_i64(a, a, 1); 1943 tcg_gen_vec_shr8i_i64(b, b, 1); 1944 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1945 tcg_gen_vec_add8_i64(d, a, b); 1946 tcg_gen_vec_add8_i64(d, d, t); 1947 } 1948 1949 static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1950 { 1951 TCGv_i64 t = tcg_temp_new_i64(); 1952 1953 tcg_gen_and_i64(t, a, b); 1954 tcg_gen_vec_shr16i_i64(a, a, 1); 1955 tcg_gen_vec_shr16i_i64(b, b, 1); 1956 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1957 tcg_gen_vec_add16_i64(d, a, b); 1958 tcg_gen_vec_add16_i64(d, d, t); 1959 } 1960 1961 static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1962 { 1963 TCGv_i32 t = tcg_temp_new_i32(); 1964 1965 tcg_gen_and_i32(t, a, b); 1966 tcg_gen_shri_i32(a, a, 1); 1967 tcg_gen_shri_i32(b, b, 1); 1968 tcg_gen_andi_i32(t, t, 1); 1969 tcg_gen_add_i32(d, a, b); 1970 tcg_gen_add_i32(d, d, t); 1971 } 1972 1973 static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1974 { 1975 TCGv_vec t = tcg_temp_new_vec_matching(d); 1976 1977 tcg_gen_and_vec(vece, t, a, b); 1978 tcg_gen_shri_vec(vece, a, a, 1); 1979 tcg_gen_shri_vec(vece, b, b, 1); 1980 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1981 tcg_gen_add_vec(vece, d, a, b); 1982 tcg_gen_add_vec(vece, d, d, t); 1983 } 1984 1985 void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1986 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1987 { 1988 static const TCGOpcode vecop_list[] = { 1989 INDEX_op_shri_vec, INDEX_op_add_vec, 0 1990 }; 1991 static const GVecGen3 g[] = { 1992 { .fni8 = gen_uhadd8_i64, 1993 .fniv = gen_uhadd_vec, 1994 .opt_opc = vecop_list, 1995 .vece = MO_8 }, 1996 { .fni8 = gen_uhadd16_i64, 1997 .fniv = gen_uhadd_vec, 1998 .opt_opc = vecop_list, 1999 .vece = MO_16 }, 2000 { .fni4 = gen_uhadd_i32, 2001 .fniv = gen_uhadd_vec, 2002 .opt_opc = vecop_list, 2003 .vece = MO_32 }, 2004 }; 2005 tcg_debug_assert(vece <= MO_32); 2006 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2007 } 2008 2009 static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2010 { 2011 TCGv_i64 t = tcg_temp_new_i64(); 2012 2013 tcg_gen_andc_i64(t, b, a); 2014 tcg_gen_vec_sar8i_i64(a, a, 1); 2015 tcg_gen_vec_sar8i_i64(b, b, 1); 2016 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 2017 tcg_gen_vec_sub8_i64(d, a, b); 2018 tcg_gen_vec_sub8_i64(d, d, t); 2019 } 2020 2021 static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2022 { 2023 TCGv_i64 t = tcg_temp_new_i64(); 2024 2025 tcg_gen_andc_i64(t, b, a); 2026 tcg_gen_vec_sar16i_i64(a, a, 1); 2027 tcg_gen_vec_sar16i_i64(b, b, 1); 2028 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 2029 tcg_gen_vec_sub16_i64(d, a, b); 2030 tcg_gen_vec_sub16_i64(d, d, t); 2031 } 2032 2033 static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 2034 { 2035 TCGv_i32 t = tcg_temp_new_i32(); 2036 2037 tcg_gen_andc_i32(t, b, a); 2038 tcg_gen_sari_i32(a, a, 1); 2039 tcg_gen_sari_i32(b, b, 1); 2040 tcg_gen_andi_i32(t, t, 1); 2041 tcg_gen_sub_i32(d, a, b); 2042 tcg_gen_sub_i32(d, d, t); 2043 } 2044 2045 static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 2046 { 2047 TCGv_vec t = tcg_temp_new_vec_matching(d); 2048 2049 tcg_gen_andc_vec(vece, t, b, a); 2050 tcg_gen_sari_vec(vece, a, a, 1); 2051 tcg_gen_sari_vec(vece, b, b, 1); 2052 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2053 tcg_gen_sub_vec(vece, d, a, b); 2054 tcg_gen_sub_vec(vece, d, d, t); 2055 } 2056 2057 void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2058 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2059 { 2060 static const TCGOpcode vecop_list[] = { 2061 INDEX_op_sari_vec, INDEX_op_sub_vec, 0 2062 }; 2063 static const GVecGen3 g[4] = { 2064 { .fni8 = gen_shsub8_i64, 2065 .fniv = gen_shsub_vec, 2066 .opt_opc = vecop_list, 2067 .vece = MO_8 }, 2068 { .fni8 = gen_shsub16_i64, 2069 .fniv = gen_shsub_vec, 2070 .opt_opc = vecop_list, 2071 .vece = MO_16 }, 2072 { .fni4 = gen_shsub_i32, 2073 .fniv = gen_shsub_vec, 2074 .opt_opc = vecop_list, 2075 .vece = MO_32 }, 2076 }; 2077 assert(vece <= MO_32); 2078 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2079 } 2080 2081 static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2082 { 2083 TCGv_i64 t = tcg_temp_new_i64(); 2084 2085 tcg_gen_andc_i64(t, b, a); 2086 tcg_gen_vec_shr8i_i64(a, a, 1); 2087 tcg_gen_vec_shr8i_i64(b, b, 1); 2088 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 2089 tcg_gen_vec_sub8_i64(d, a, b); 2090 tcg_gen_vec_sub8_i64(d, d, t); 2091 } 2092 2093 static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2094 { 2095 TCGv_i64 t = tcg_temp_new_i64(); 2096 2097 tcg_gen_andc_i64(t, b, a); 2098 tcg_gen_vec_shr16i_i64(a, a, 1); 2099 tcg_gen_vec_shr16i_i64(b, b, 1); 2100 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 2101 tcg_gen_vec_sub16_i64(d, a, b); 2102 tcg_gen_vec_sub16_i64(d, d, t); 2103 } 2104 2105 static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 2106 { 2107 TCGv_i32 t = tcg_temp_new_i32(); 2108 2109 tcg_gen_andc_i32(t, b, a); 2110 tcg_gen_shri_i32(a, a, 1); 2111 tcg_gen_shri_i32(b, b, 1); 2112 tcg_gen_andi_i32(t, t, 1); 2113 tcg_gen_sub_i32(d, a, b); 2114 tcg_gen_sub_i32(d, d, t); 2115 } 2116 2117 static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 2118 { 2119 TCGv_vec t = tcg_temp_new_vec_matching(d); 2120 2121 tcg_gen_andc_vec(vece, t, b, a); 2122 tcg_gen_shri_vec(vece, a, a, 1); 2123 tcg_gen_shri_vec(vece, b, b, 1); 2124 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2125 tcg_gen_sub_vec(vece, d, a, b); 2126 tcg_gen_sub_vec(vece, d, d, t); 2127 } 2128 2129 void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2130 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2131 { 2132 static const TCGOpcode vecop_list[] = { 2133 INDEX_op_shri_vec, INDEX_op_sub_vec, 0 2134 }; 2135 static const GVecGen3 g[4] = { 2136 { .fni8 = gen_uhsub8_i64, 2137 .fniv = gen_uhsub_vec, 2138 .opt_opc = vecop_list, 2139 .vece = MO_8 }, 2140 { .fni8 = gen_uhsub16_i64, 2141 .fniv = gen_uhsub_vec, 2142 .opt_opc = vecop_list, 2143 .vece = MO_16 }, 2144 { .fni4 = gen_uhsub_i32, 2145 .fniv = gen_uhsub_vec, 2146 .opt_opc = vecop_list, 2147 .vece = MO_32 }, 2148 }; 2149 assert(vece <= MO_32); 2150 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2151 } 2152 2153 static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2154 { 2155 TCGv_i64 t = tcg_temp_new_i64(); 2156 2157 tcg_gen_or_i64(t, a, b); 2158 tcg_gen_vec_sar8i_i64(a, a, 1); 2159 tcg_gen_vec_sar8i_i64(b, b, 1); 2160 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 2161 tcg_gen_vec_add8_i64(d, a, b); 2162 tcg_gen_vec_add8_i64(d, d, t); 2163 } 2164 2165 static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2166 { 2167 TCGv_i64 t = tcg_temp_new_i64(); 2168 2169 tcg_gen_or_i64(t, a, b); 2170 tcg_gen_vec_sar16i_i64(a, a, 1); 2171 tcg_gen_vec_sar16i_i64(b, b, 1); 2172 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 2173 tcg_gen_vec_add16_i64(d, a, b); 2174 tcg_gen_vec_add16_i64(d, d, t); 2175 } 2176 2177 static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 2178 { 2179 TCGv_i32 t = tcg_temp_new_i32(); 2180 2181 tcg_gen_or_i32(t, a, b); 2182 tcg_gen_sari_i32(a, a, 1); 2183 tcg_gen_sari_i32(b, b, 1); 2184 tcg_gen_andi_i32(t, t, 1); 2185 tcg_gen_add_i32(d, a, b); 2186 tcg_gen_add_i32(d, d, t); 2187 } 2188 2189 static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 2190 { 2191 TCGv_vec t = tcg_temp_new_vec_matching(d); 2192 2193 tcg_gen_or_vec(vece, t, a, b); 2194 tcg_gen_sari_vec(vece, a, a, 1); 2195 tcg_gen_sari_vec(vece, b, b, 1); 2196 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2197 tcg_gen_add_vec(vece, d, a, b); 2198 tcg_gen_add_vec(vece, d, d, t); 2199 } 2200 2201 void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2202 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2203 { 2204 static const TCGOpcode vecop_list[] = { 2205 INDEX_op_sari_vec, INDEX_op_add_vec, 0 2206 }; 2207 static const GVecGen3 g[] = { 2208 { .fni8 = gen_srhadd8_i64, 2209 .fniv = gen_srhadd_vec, 2210 .opt_opc = vecop_list, 2211 .vece = MO_8 }, 2212 { .fni8 = gen_srhadd16_i64, 2213 .fniv = gen_srhadd_vec, 2214 .opt_opc = vecop_list, 2215 .vece = MO_16 }, 2216 { .fni4 = gen_srhadd_i32, 2217 .fniv = gen_srhadd_vec, 2218 .opt_opc = vecop_list, 2219 .vece = MO_32 }, 2220 }; 2221 assert(vece <= MO_32); 2222 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2223 } 2224 2225 static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2226 { 2227 TCGv_i64 t = tcg_temp_new_i64(); 2228 2229 tcg_gen_or_i64(t, a, b); 2230 tcg_gen_vec_shr8i_i64(a, a, 1); 2231 tcg_gen_vec_shr8i_i64(b, b, 1); 2232 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 2233 tcg_gen_vec_add8_i64(d, a, b); 2234 tcg_gen_vec_add8_i64(d, d, t); 2235 } 2236 2237 static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 2238 { 2239 TCGv_i64 t = tcg_temp_new_i64(); 2240 2241 tcg_gen_or_i64(t, a, b); 2242 tcg_gen_vec_shr16i_i64(a, a, 1); 2243 tcg_gen_vec_shr16i_i64(b, b, 1); 2244 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 2245 tcg_gen_vec_add16_i64(d, a, b); 2246 tcg_gen_vec_add16_i64(d, d, t); 2247 } 2248 2249 static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 2250 { 2251 TCGv_i32 t = tcg_temp_new_i32(); 2252 2253 tcg_gen_or_i32(t, a, b); 2254 tcg_gen_shri_i32(a, a, 1); 2255 tcg_gen_shri_i32(b, b, 1); 2256 tcg_gen_andi_i32(t, t, 1); 2257 tcg_gen_add_i32(d, a, b); 2258 tcg_gen_add_i32(d, d, t); 2259 } 2260 2261 static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 2262 { 2263 TCGv_vec t = tcg_temp_new_vec_matching(d); 2264 2265 tcg_gen_or_vec(vece, t, a, b); 2266 tcg_gen_shri_vec(vece, a, a, 1); 2267 tcg_gen_shri_vec(vece, b, b, 1); 2268 tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2269 tcg_gen_add_vec(vece, d, a, b); 2270 tcg_gen_add_vec(vece, d, d, t); 2271 } 2272 2273 void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2274 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2275 { 2276 static const TCGOpcode vecop_list[] = { 2277 INDEX_op_shri_vec, INDEX_op_add_vec, 0 2278 }; 2279 static const GVecGen3 g[] = { 2280 { .fni8 = gen_urhadd8_i64, 2281 .fniv = gen_urhadd_vec, 2282 .opt_opc = vecop_list, 2283 .vece = MO_8 }, 2284 { .fni8 = gen_urhadd16_i64, 2285 .fniv = gen_urhadd_vec, 2286 .opt_opc = vecop_list, 2287 .vece = MO_16 }, 2288 { .fni4 = gen_urhadd_i32, 2289 .fniv = gen_urhadd_vec, 2290 .opt_opc = vecop_list, 2291 .vece = MO_32 }, 2292 }; 2293 assert(vece <= MO_32); 2294 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2295 } 2296