1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 33 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 34 opr_sz, max_sz, 0, fn); 35 } 36 37 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 38 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 39 { 40 static gen_helper_gvec_3_ptr * const fns[2] = { 41 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 42 }; 43 tcg_debug_assert(vece >= 1 && vece <= 2); 44 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 45 } 46 47 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 48 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 49 { 50 static gen_helper_gvec_3_ptr * const fns[2] = { 51 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 52 }; 53 tcg_debug_assert(vece >= 1 && vece <= 2); 54 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 55 } 56 57 #define GEN_CMP0(NAME, COND) \ 58 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 59 uint32_t opr_sz, uint32_t max_sz) \ 60 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 61 62 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 63 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 64 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 65 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 66 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 67 68 #undef GEN_CMP0 69 70 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 71 { 72 tcg_gen_vec_sar8i_i64(a, a, shift); 73 tcg_gen_vec_add8_i64(d, d, a); 74 } 75 76 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 77 { 78 tcg_gen_vec_sar16i_i64(a, a, shift); 79 tcg_gen_vec_add16_i64(d, d, a); 80 } 81 82 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 83 { 84 tcg_gen_sari_i32(a, a, shift); 85 tcg_gen_add_i32(d, d, a); 86 } 87 88 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 89 { 90 tcg_gen_sari_i64(a, a, shift); 91 tcg_gen_add_i64(d, d, a); 92 } 93 94 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 95 { 96 tcg_gen_sari_vec(vece, a, a, sh); 97 tcg_gen_add_vec(vece, d, d, a); 98 } 99 100 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 101 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 102 { 103 static const TCGOpcode vecop_list[] = { 104 INDEX_op_sari_vec, INDEX_op_add_vec, 0 105 }; 106 static const GVecGen2i ops[4] = { 107 { .fni8 = gen_ssra8_i64, 108 .fniv = gen_ssra_vec, 109 .fno = gen_helper_gvec_ssra_b, 110 .load_dest = true, 111 .opt_opc = vecop_list, 112 .vece = MO_8 }, 113 { .fni8 = gen_ssra16_i64, 114 .fniv = gen_ssra_vec, 115 .fno = gen_helper_gvec_ssra_h, 116 .load_dest = true, 117 .opt_opc = vecop_list, 118 .vece = MO_16 }, 119 { .fni4 = gen_ssra32_i32, 120 .fniv = gen_ssra_vec, 121 .fno = gen_helper_gvec_ssra_s, 122 .load_dest = true, 123 .opt_opc = vecop_list, 124 .vece = MO_32 }, 125 { .fni8 = gen_ssra64_i64, 126 .fniv = gen_ssra_vec, 127 .fno = gen_helper_gvec_ssra_d, 128 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 129 .opt_opc = vecop_list, 130 .load_dest = true, 131 .vece = MO_64 }, 132 }; 133 134 /* tszimm encoding produces immediates in the range [1..esize]. */ 135 tcg_debug_assert(shift > 0); 136 tcg_debug_assert(shift <= (8 << vece)); 137 138 /* 139 * Shifts larger than the element size are architecturally valid. 140 * Signed results in all sign bits. 141 */ 142 shift = MIN(shift, (8 << vece) - 1); 143 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 144 } 145 146 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 147 { 148 tcg_gen_vec_shr8i_i64(a, a, shift); 149 tcg_gen_vec_add8_i64(d, d, a); 150 } 151 152 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 153 { 154 tcg_gen_vec_shr16i_i64(a, a, shift); 155 tcg_gen_vec_add16_i64(d, d, a); 156 } 157 158 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 159 { 160 tcg_gen_shri_i32(a, a, shift); 161 tcg_gen_add_i32(d, d, a); 162 } 163 164 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 165 { 166 tcg_gen_shri_i64(a, a, shift); 167 tcg_gen_add_i64(d, d, a); 168 } 169 170 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 171 { 172 tcg_gen_shri_vec(vece, a, a, sh); 173 tcg_gen_add_vec(vece, d, d, a); 174 } 175 176 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 177 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 178 { 179 static const TCGOpcode vecop_list[] = { 180 INDEX_op_shri_vec, INDEX_op_add_vec, 0 181 }; 182 static const GVecGen2i ops[4] = { 183 { .fni8 = gen_usra8_i64, 184 .fniv = gen_usra_vec, 185 .fno = gen_helper_gvec_usra_b, 186 .load_dest = true, 187 .opt_opc = vecop_list, 188 .vece = MO_8, }, 189 { .fni8 = gen_usra16_i64, 190 .fniv = gen_usra_vec, 191 .fno = gen_helper_gvec_usra_h, 192 .load_dest = true, 193 .opt_opc = vecop_list, 194 .vece = MO_16, }, 195 { .fni4 = gen_usra32_i32, 196 .fniv = gen_usra_vec, 197 .fno = gen_helper_gvec_usra_s, 198 .load_dest = true, 199 .opt_opc = vecop_list, 200 .vece = MO_32, }, 201 { .fni8 = gen_usra64_i64, 202 .fniv = gen_usra_vec, 203 .fno = gen_helper_gvec_usra_d, 204 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 205 .load_dest = true, 206 .opt_opc = vecop_list, 207 .vece = MO_64, }, 208 }; 209 210 /* tszimm encoding produces immediates in the range [1..esize]. */ 211 tcg_debug_assert(shift > 0); 212 tcg_debug_assert(shift <= (8 << vece)); 213 214 /* 215 * Shifts larger than the element size are architecturally valid. 216 * Unsigned results in all zeros as input to accumulate: nop. 217 */ 218 if (shift < (8 << vece)) { 219 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 220 } else { 221 /* Nop, but we do need to clear the tail. */ 222 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 223 } 224 } 225 226 /* 227 * Shift one less than the requested amount, and the low bit is 228 * the rounding bit. For the 8 and 16-bit operations, because we 229 * mask the low bit, we can perform a normal integer shift instead 230 * of a vector shift. 231 */ 232 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 233 { 234 TCGv_i64 t = tcg_temp_new_i64(); 235 236 tcg_gen_shri_i64(t, a, sh - 1); 237 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 238 tcg_gen_vec_sar8i_i64(d, a, sh); 239 tcg_gen_vec_add8_i64(d, d, t); 240 } 241 242 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 243 { 244 TCGv_i64 t = tcg_temp_new_i64(); 245 246 tcg_gen_shri_i64(t, a, sh - 1); 247 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 248 tcg_gen_vec_sar16i_i64(d, a, sh); 249 tcg_gen_vec_add16_i64(d, d, t); 250 } 251 252 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 253 { 254 TCGv_i32 t; 255 256 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 257 if (sh == 32) { 258 tcg_gen_movi_i32(d, 0); 259 return; 260 } 261 t = tcg_temp_new_i32(); 262 tcg_gen_extract_i32(t, a, sh - 1, 1); 263 tcg_gen_sari_i32(d, a, sh); 264 tcg_gen_add_i32(d, d, t); 265 } 266 267 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 268 { 269 TCGv_i64 t = tcg_temp_new_i64(); 270 271 tcg_gen_extract_i64(t, a, sh - 1, 1); 272 tcg_gen_sari_i64(d, a, sh); 273 tcg_gen_add_i64(d, d, t); 274 } 275 276 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 277 { 278 TCGv_vec t = tcg_temp_new_vec_matching(d); 279 TCGv_vec ones = tcg_temp_new_vec_matching(d); 280 281 tcg_gen_shri_vec(vece, t, a, sh - 1); 282 tcg_gen_dupi_vec(vece, ones, 1); 283 tcg_gen_and_vec(vece, t, t, ones); 284 tcg_gen_sari_vec(vece, d, a, sh); 285 tcg_gen_add_vec(vece, d, d, t); 286 } 287 288 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 289 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 290 { 291 static const TCGOpcode vecop_list[] = { 292 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 293 }; 294 static const GVecGen2i ops[4] = { 295 { .fni8 = gen_srshr8_i64, 296 .fniv = gen_srshr_vec, 297 .fno = gen_helper_gvec_srshr_b, 298 .opt_opc = vecop_list, 299 .vece = MO_8 }, 300 { .fni8 = gen_srshr16_i64, 301 .fniv = gen_srshr_vec, 302 .fno = gen_helper_gvec_srshr_h, 303 .opt_opc = vecop_list, 304 .vece = MO_16 }, 305 { .fni4 = gen_srshr32_i32, 306 .fniv = gen_srshr_vec, 307 .fno = gen_helper_gvec_srshr_s, 308 .opt_opc = vecop_list, 309 .vece = MO_32 }, 310 { .fni8 = gen_srshr64_i64, 311 .fniv = gen_srshr_vec, 312 .fno = gen_helper_gvec_srshr_d, 313 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 314 .opt_opc = vecop_list, 315 .vece = MO_64 }, 316 }; 317 318 /* tszimm encoding produces immediates in the range [1..esize] */ 319 tcg_debug_assert(shift > 0); 320 tcg_debug_assert(shift <= (8 << vece)); 321 322 if (shift == (8 << vece)) { 323 /* 324 * Shifts larger than the element size are architecturally valid. 325 * Signed results in all sign bits. With rounding, this produces 326 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 327 * I.e. always zero. 328 */ 329 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 330 } else { 331 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 332 } 333 } 334 335 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 336 { 337 TCGv_i64 t = tcg_temp_new_i64(); 338 339 gen_srshr8_i64(t, a, sh); 340 tcg_gen_vec_add8_i64(d, d, t); 341 } 342 343 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 344 { 345 TCGv_i64 t = tcg_temp_new_i64(); 346 347 gen_srshr16_i64(t, a, sh); 348 tcg_gen_vec_add16_i64(d, d, t); 349 } 350 351 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 352 { 353 TCGv_i32 t = tcg_temp_new_i32(); 354 355 gen_srshr32_i32(t, a, sh); 356 tcg_gen_add_i32(d, d, t); 357 } 358 359 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 360 { 361 TCGv_i64 t = tcg_temp_new_i64(); 362 363 gen_srshr64_i64(t, a, sh); 364 tcg_gen_add_i64(d, d, t); 365 } 366 367 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 368 { 369 TCGv_vec t = tcg_temp_new_vec_matching(d); 370 371 gen_srshr_vec(vece, t, a, sh); 372 tcg_gen_add_vec(vece, d, d, t); 373 } 374 375 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 376 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 377 { 378 static const TCGOpcode vecop_list[] = { 379 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 380 }; 381 static const GVecGen2i ops[4] = { 382 { .fni8 = gen_srsra8_i64, 383 .fniv = gen_srsra_vec, 384 .fno = gen_helper_gvec_srsra_b, 385 .opt_opc = vecop_list, 386 .load_dest = true, 387 .vece = MO_8 }, 388 { .fni8 = gen_srsra16_i64, 389 .fniv = gen_srsra_vec, 390 .fno = gen_helper_gvec_srsra_h, 391 .opt_opc = vecop_list, 392 .load_dest = true, 393 .vece = MO_16 }, 394 { .fni4 = gen_srsra32_i32, 395 .fniv = gen_srsra_vec, 396 .fno = gen_helper_gvec_srsra_s, 397 .opt_opc = vecop_list, 398 .load_dest = true, 399 .vece = MO_32 }, 400 { .fni8 = gen_srsra64_i64, 401 .fniv = gen_srsra_vec, 402 .fno = gen_helper_gvec_srsra_d, 403 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 404 .opt_opc = vecop_list, 405 .load_dest = true, 406 .vece = MO_64 }, 407 }; 408 409 /* tszimm encoding produces immediates in the range [1..esize] */ 410 tcg_debug_assert(shift > 0); 411 tcg_debug_assert(shift <= (8 << vece)); 412 413 /* 414 * Shifts larger than the element size are architecturally valid. 415 * Signed results in all sign bits. With rounding, this produces 416 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 417 * I.e. always zero. With accumulation, this leaves D unchanged. 418 */ 419 if (shift == (8 << vece)) { 420 /* Nop, but we do need to clear the tail. */ 421 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 422 } else { 423 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 424 } 425 } 426 427 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 428 { 429 TCGv_i64 t = tcg_temp_new_i64(); 430 431 tcg_gen_shri_i64(t, a, sh - 1); 432 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 433 tcg_gen_vec_shr8i_i64(d, a, sh); 434 tcg_gen_vec_add8_i64(d, d, t); 435 } 436 437 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 438 { 439 TCGv_i64 t = tcg_temp_new_i64(); 440 441 tcg_gen_shri_i64(t, a, sh - 1); 442 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 443 tcg_gen_vec_shr16i_i64(d, a, sh); 444 tcg_gen_vec_add16_i64(d, d, t); 445 } 446 447 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 448 { 449 TCGv_i32 t; 450 451 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 452 if (sh == 32) { 453 tcg_gen_extract_i32(d, a, sh - 1, 1); 454 return; 455 } 456 t = tcg_temp_new_i32(); 457 tcg_gen_extract_i32(t, a, sh - 1, 1); 458 tcg_gen_shri_i32(d, a, sh); 459 tcg_gen_add_i32(d, d, t); 460 } 461 462 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 463 { 464 TCGv_i64 t = tcg_temp_new_i64(); 465 466 tcg_gen_extract_i64(t, a, sh - 1, 1); 467 tcg_gen_shri_i64(d, a, sh); 468 tcg_gen_add_i64(d, d, t); 469 } 470 471 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 472 { 473 TCGv_vec t = tcg_temp_new_vec_matching(d); 474 TCGv_vec ones = tcg_temp_new_vec_matching(d); 475 476 tcg_gen_shri_vec(vece, t, a, shift - 1); 477 tcg_gen_dupi_vec(vece, ones, 1); 478 tcg_gen_and_vec(vece, t, t, ones); 479 tcg_gen_shri_vec(vece, d, a, shift); 480 tcg_gen_add_vec(vece, d, d, t); 481 } 482 483 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 484 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 485 { 486 static const TCGOpcode vecop_list[] = { 487 INDEX_op_shri_vec, INDEX_op_add_vec, 0 488 }; 489 static const GVecGen2i ops[4] = { 490 { .fni8 = gen_urshr8_i64, 491 .fniv = gen_urshr_vec, 492 .fno = gen_helper_gvec_urshr_b, 493 .opt_opc = vecop_list, 494 .vece = MO_8 }, 495 { .fni8 = gen_urshr16_i64, 496 .fniv = gen_urshr_vec, 497 .fno = gen_helper_gvec_urshr_h, 498 .opt_opc = vecop_list, 499 .vece = MO_16 }, 500 { .fni4 = gen_urshr32_i32, 501 .fniv = gen_urshr_vec, 502 .fno = gen_helper_gvec_urshr_s, 503 .opt_opc = vecop_list, 504 .vece = MO_32 }, 505 { .fni8 = gen_urshr64_i64, 506 .fniv = gen_urshr_vec, 507 .fno = gen_helper_gvec_urshr_d, 508 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 509 .opt_opc = vecop_list, 510 .vece = MO_64 }, 511 }; 512 513 /* tszimm encoding produces immediates in the range [1..esize] */ 514 tcg_debug_assert(shift > 0); 515 tcg_debug_assert(shift <= (8 << vece)); 516 517 if (shift == (8 << vece)) { 518 /* 519 * Shifts larger than the element size are architecturally valid. 520 * Unsigned results in zero. With rounding, this produces a 521 * copy of the most significant bit. 522 */ 523 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 524 } else { 525 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 526 } 527 } 528 529 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 530 { 531 TCGv_i64 t = tcg_temp_new_i64(); 532 533 if (sh == 8) { 534 tcg_gen_vec_shr8i_i64(t, a, 7); 535 } else { 536 gen_urshr8_i64(t, a, sh); 537 } 538 tcg_gen_vec_add8_i64(d, d, t); 539 } 540 541 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 542 { 543 TCGv_i64 t = tcg_temp_new_i64(); 544 545 if (sh == 16) { 546 tcg_gen_vec_shr16i_i64(t, a, 15); 547 } else { 548 gen_urshr16_i64(t, a, sh); 549 } 550 tcg_gen_vec_add16_i64(d, d, t); 551 } 552 553 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 554 { 555 TCGv_i32 t = tcg_temp_new_i32(); 556 557 if (sh == 32) { 558 tcg_gen_shri_i32(t, a, 31); 559 } else { 560 gen_urshr32_i32(t, a, sh); 561 } 562 tcg_gen_add_i32(d, d, t); 563 } 564 565 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 566 { 567 TCGv_i64 t = tcg_temp_new_i64(); 568 569 if (sh == 64) { 570 tcg_gen_shri_i64(t, a, 63); 571 } else { 572 gen_urshr64_i64(t, a, sh); 573 } 574 tcg_gen_add_i64(d, d, t); 575 } 576 577 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 578 { 579 TCGv_vec t = tcg_temp_new_vec_matching(d); 580 581 if (sh == (8 << vece)) { 582 tcg_gen_shri_vec(vece, t, a, sh - 1); 583 } else { 584 gen_urshr_vec(vece, t, a, sh); 585 } 586 tcg_gen_add_vec(vece, d, d, t); 587 } 588 589 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 590 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 591 { 592 static const TCGOpcode vecop_list[] = { 593 INDEX_op_shri_vec, INDEX_op_add_vec, 0 594 }; 595 static const GVecGen2i ops[4] = { 596 { .fni8 = gen_ursra8_i64, 597 .fniv = gen_ursra_vec, 598 .fno = gen_helper_gvec_ursra_b, 599 .opt_opc = vecop_list, 600 .load_dest = true, 601 .vece = MO_8 }, 602 { .fni8 = gen_ursra16_i64, 603 .fniv = gen_ursra_vec, 604 .fno = gen_helper_gvec_ursra_h, 605 .opt_opc = vecop_list, 606 .load_dest = true, 607 .vece = MO_16 }, 608 { .fni4 = gen_ursra32_i32, 609 .fniv = gen_ursra_vec, 610 .fno = gen_helper_gvec_ursra_s, 611 .opt_opc = vecop_list, 612 .load_dest = true, 613 .vece = MO_32 }, 614 { .fni8 = gen_ursra64_i64, 615 .fniv = gen_ursra_vec, 616 .fno = gen_helper_gvec_ursra_d, 617 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 618 .opt_opc = vecop_list, 619 .load_dest = true, 620 .vece = MO_64 }, 621 }; 622 623 /* tszimm encoding produces immediates in the range [1..esize] */ 624 tcg_debug_assert(shift > 0); 625 tcg_debug_assert(shift <= (8 << vece)); 626 627 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 628 } 629 630 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 631 { 632 uint64_t mask = dup_const(MO_8, 0xff >> shift); 633 TCGv_i64 t = tcg_temp_new_i64(); 634 635 tcg_gen_shri_i64(t, a, shift); 636 tcg_gen_andi_i64(t, t, mask); 637 tcg_gen_andi_i64(d, d, ~mask); 638 tcg_gen_or_i64(d, d, t); 639 } 640 641 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 642 { 643 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 644 TCGv_i64 t = tcg_temp_new_i64(); 645 646 tcg_gen_shri_i64(t, a, shift); 647 tcg_gen_andi_i64(t, t, mask); 648 tcg_gen_andi_i64(d, d, ~mask); 649 tcg_gen_or_i64(d, d, t); 650 } 651 652 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 653 { 654 tcg_gen_shri_i32(a, a, shift); 655 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 656 } 657 658 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 659 { 660 tcg_gen_shri_i64(a, a, shift); 661 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 662 } 663 664 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 665 { 666 TCGv_vec t = tcg_temp_new_vec_matching(d); 667 TCGv_vec m = tcg_temp_new_vec_matching(d); 668 669 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 670 tcg_gen_shri_vec(vece, t, a, sh); 671 tcg_gen_and_vec(vece, d, d, m); 672 tcg_gen_or_vec(vece, d, d, t); 673 } 674 675 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 676 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 677 { 678 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 679 const GVecGen2i ops[4] = { 680 { .fni8 = gen_shr8_ins_i64, 681 .fniv = gen_shr_ins_vec, 682 .fno = gen_helper_gvec_sri_b, 683 .load_dest = true, 684 .opt_opc = vecop_list, 685 .vece = MO_8 }, 686 { .fni8 = gen_shr16_ins_i64, 687 .fniv = gen_shr_ins_vec, 688 .fno = gen_helper_gvec_sri_h, 689 .load_dest = true, 690 .opt_opc = vecop_list, 691 .vece = MO_16 }, 692 { .fni4 = gen_shr32_ins_i32, 693 .fniv = gen_shr_ins_vec, 694 .fno = gen_helper_gvec_sri_s, 695 .load_dest = true, 696 .opt_opc = vecop_list, 697 .vece = MO_32 }, 698 { .fni8 = gen_shr64_ins_i64, 699 .fniv = gen_shr_ins_vec, 700 .fno = gen_helper_gvec_sri_d, 701 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 702 .load_dest = true, 703 .opt_opc = vecop_list, 704 .vece = MO_64 }, 705 }; 706 707 /* tszimm encoding produces immediates in the range [1..esize]. */ 708 tcg_debug_assert(shift > 0); 709 tcg_debug_assert(shift <= (8 << vece)); 710 711 /* Shift of esize leaves destination unchanged. */ 712 if (shift < (8 << vece)) { 713 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 714 } else { 715 /* Nop, but we do need to clear the tail. */ 716 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 717 } 718 } 719 720 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 721 { 722 uint64_t mask = dup_const(MO_8, 0xff << shift); 723 TCGv_i64 t = tcg_temp_new_i64(); 724 725 tcg_gen_shli_i64(t, a, shift); 726 tcg_gen_andi_i64(t, t, mask); 727 tcg_gen_andi_i64(d, d, ~mask); 728 tcg_gen_or_i64(d, d, t); 729 } 730 731 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 732 { 733 uint64_t mask = dup_const(MO_16, 0xffff << shift); 734 TCGv_i64 t = tcg_temp_new_i64(); 735 736 tcg_gen_shli_i64(t, a, shift); 737 tcg_gen_andi_i64(t, t, mask); 738 tcg_gen_andi_i64(d, d, ~mask); 739 tcg_gen_or_i64(d, d, t); 740 } 741 742 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 743 { 744 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 745 } 746 747 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 748 { 749 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 750 } 751 752 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 753 { 754 TCGv_vec t = tcg_temp_new_vec_matching(d); 755 TCGv_vec m = tcg_temp_new_vec_matching(d); 756 757 tcg_gen_shli_vec(vece, t, a, sh); 758 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 759 tcg_gen_and_vec(vece, d, d, m); 760 tcg_gen_or_vec(vece, d, d, t); 761 } 762 763 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 764 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 765 { 766 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 767 const GVecGen2i ops[4] = { 768 { .fni8 = gen_shl8_ins_i64, 769 .fniv = gen_shl_ins_vec, 770 .fno = gen_helper_gvec_sli_b, 771 .load_dest = true, 772 .opt_opc = vecop_list, 773 .vece = MO_8 }, 774 { .fni8 = gen_shl16_ins_i64, 775 .fniv = gen_shl_ins_vec, 776 .fno = gen_helper_gvec_sli_h, 777 .load_dest = true, 778 .opt_opc = vecop_list, 779 .vece = MO_16 }, 780 { .fni4 = gen_shl32_ins_i32, 781 .fniv = gen_shl_ins_vec, 782 .fno = gen_helper_gvec_sli_s, 783 .load_dest = true, 784 .opt_opc = vecop_list, 785 .vece = MO_32 }, 786 { .fni8 = gen_shl64_ins_i64, 787 .fniv = gen_shl_ins_vec, 788 .fno = gen_helper_gvec_sli_d, 789 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 790 .load_dest = true, 791 .opt_opc = vecop_list, 792 .vece = MO_64 }, 793 }; 794 795 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 796 tcg_debug_assert(shift >= 0); 797 tcg_debug_assert(shift < (8 << vece)); 798 799 if (shift == 0) { 800 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 801 } else { 802 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 803 } 804 } 805 806 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 807 { 808 gen_helper_neon_mul_u8(a, a, b); 809 gen_helper_neon_add_u8(d, d, a); 810 } 811 812 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 813 { 814 gen_helper_neon_mul_u8(a, a, b); 815 gen_helper_neon_sub_u8(d, d, a); 816 } 817 818 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 819 { 820 gen_helper_neon_mul_u16(a, a, b); 821 gen_helper_neon_add_u16(d, d, a); 822 } 823 824 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 825 { 826 gen_helper_neon_mul_u16(a, a, b); 827 gen_helper_neon_sub_u16(d, d, a); 828 } 829 830 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 831 { 832 tcg_gen_mul_i32(a, a, b); 833 tcg_gen_add_i32(d, d, a); 834 } 835 836 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 837 { 838 tcg_gen_mul_i32(a, a, b); 839 tcg_gen_sub_i32(d, d, a); 840 } 841 842 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 843 { 844 tcg_gen_mul_i64(a, a, b); 845 tcg_gen_add_i64(d, d, a); 846 } 847 848 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 849 { 850 tcg_gen_mul_i64(a, a, b); 851 tcg_gen_sub_i64(d, d, a); 852 } 853 854 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 855 { 856 tcg_gen_mul_vec(vece, a, a, b); 857 tcg_gen_add_vec(vece, d, d, a); 858 } 859 860 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 861 { 862 tcg_gen_mul_vec(vece, a, a, b); 863 tcg_gen_sub_vec(vece, d, d, a); 864 } 865 866 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 867 * these tables are shared with AArch64 which does support them. 868 */ 869 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 870 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 871 { 872 static const TCGOpcode vecop_list[] = { 873 INDEX_op_mul_vec, INDEX_op_add_vec, 0 874 }; 875 static const GVecGen3 ops[4] = { 876 { .fni4 = gen_mla8_i32, 877 .fniv = gen_mla_vec, 878 .load_dest = true, 879 .opt_opc = vecop_list, 880 .vece = MO_8 }, 881 { .fni4 = gen_mla16_i32, 882 .fniv = gen_mla_vec, 883 .load_dest = true, 884 .opt_opc = vecop_list, 885 .vece = MO_16 }, 886 { .fni4 = gen_mla32_i32, 887 .fniv = gen_mla_vec, 888 .load_dest = true, 889 .opt_opc = vecop_list, 890 .vece = MO_32 }, 891 { .fni8 = gen_mla64_i64, 892 .fniv = gen_mla_vec, 893 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 894 .load_dest = true, 895 .opt_opc = vecop_list, 896 .vece = MO_64 }, 897 }; 898 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 899 } 900 901 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 902 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 903 { 904 static const TCGOpcode vecop_list[] = { 905 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 906 }; 907 static const GVecGen3 ops[4] = { 908 { .fni4 = gen_mls8_i32, 909 .fniv = gen_mls_vec, 910 .load_dest = true, 911 .opt_opc = vecop_list, 912 .vece = MO_8 }, 913 { .fni4 = gen_mls16_i32, 914 .fniv = gen_mls_vec, 915 .load_dest = true, 916 .opt_opc = vecop_list, 917 .vece = MO_16 }, 918 { .fni4 = gen_mls32_i32, 919 .fniv = gen_mls_vec, 920 .load_dest = true, 921 .opt_opc = vecop_list, 922 .vece = MO_32 }, 923 { .fni8 = gen_mls64_i64, 924 .fniv = gen_mls_vec, 925 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 926 .load_dest = true, 927 .opt_opc = vecop_list, 928 .vece = MO_64 }, 929 }; 930 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 931 } 932 933 /* CMTST : test is "if (X & Y != 0)". */ 934 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 935 { 936 tcg_gen_and_i32(d, a, b); 937 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 938 } 939 940 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 941 { 942 tcg_gen_and_i64(d, a, b); 943 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 944 } 945 946 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 947 { 948 tcg_gen_and_vec(vece, d, a, b); 949 tcg_gen_dupi_vec(vece, a, 0); 950 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 951 } 952 953 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 954 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 955 { 956 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 957 static const GVecGen3 ops[4] = { 958 { .fni4 = gen_helper_neon_tst_u8, 959 .fniv = gen_cmtst_vec, 960 .opt_opc = vecop_list, 961 .vece = MO_8 }, 962 { .fni4 = gen_helper_neon_tst_u16, 963 .fniv = gen_cmtst_vec, 964 .opt_opc = vecop_list, 965 .vece = MO_16 }, 966 { .fni4 = gen_cmtst_i32, 967 .fniv = gen_cmtst_vec, 968 .opt_opc = vecop_list, 969 .vece = MO_32 }, 970 { .fni8 = gen_cmtst_i64, 971 .fniv = gen_cmtst_vec, 972 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 973 .opt_opc = vecop_list, 974 .vece = MO_64 }, 975 }; 976 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 977 } 978 979 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 980 { 981 TCGv_i32 lval = tcg_temp_new_i32(); 982 TCGv_i32 rval = tcg_temp_new_i32(); 983 TCGv_i32 lsh = tcg_temp_new_i32(); 984 TCGv_i32 rsh = tcg_temp_new_i32(); 985 TCGv_i32 zero = tcg_constant_i32(0); 986 TCGv_i32 max = tcg_constant_i32(32); 987 988 /* 989 * Rely on the TCG guarantee that out of range shifts produce 990 * unspecified results, not undefined behaviour (i.e. no trap). 991 * Discard out-of-range results after the fact. 992 */ 993 tcg_gen_ext8s_i32(lsh, shift); 994 tcg_gen_neg_i32(rsh, lsh); 995 tcg_gen_shl_i32(lval, src, lsh); 996 tcg_gen_shr_i32(rval, src, rsh); 997 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 999 } 1000 1001 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1002 { 1003 TCGv_i64 lval = tcg_temp_new_i64(); 1004 TCGv_i64 rval = tcg_temp_new_i64(); 1005 TCGv_i64 lsh = tcg_temp_new_i64(); 1006 TCGv_i64 rsh = tcg_temp_new_i64(); 1007 TCGv_i64 zero = tcg_constant_i64(0); 1008 TCGv_i64 max = tcg_constant_i64(64); 1009 1010 /* 1011 * Rely on the TCG guarantee that out of range shifts produce 1012 * unspecified results, not undefined behaviour (i.e. no trap). 1013 * Discard out-of-range results after the fact. 1014 */ 1015 tcg_gen_ext8s_i64(lsh, shift); 1016 tcg_gen_neg_i64(rsh, lsh); 1017 tcg_gen_shl_i64(lval, src, lsh); 1018 tcg_gen_shr_i64(rval, src, rsh); 1019 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1021 } 1022 1023 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1024 TCGv_vec src, TCGv_vec shift) 1025 { 1026 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1027 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1028 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1029 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1030 TCGv_vec msk, max; 1031 1032 tcg_gen_neg_vec(vece, rsh, shift); 1033 if (vece == MO_8) { 1034 tcg_gen_mov_vec(lsh, shift); 1035 } else { 1036 msk = tcg_temp_new_vec_matching(dst); 1037 tcg_gen_dupi_vec(vece, msk, 0xff); 1038 tcg_gen_and_vec(vece, lsh, shift, msk); 1039 tcg_gen_and_vec(vece, rsh, rsh, msk); 1040 } 1041 1042 /* 1043 * Rely on the TCG guarantee that out of range shifts produce 1044 * unspecified results, not undefined behaviour (i.e. no trap). 1045 * Discard out-of-range results after the fact. 1046 */ 1047 tcg_gen_shlv_vec(vece, lval, src, lsh); 1048 tcg_gen_shrv_vec(vece, rval, src, rsh); 1049 1050 max = tcg_temp_new_vec_matching(dst); 1051 tcg_gen_dupi_vec(vece, max, 8 << vece); 1052 1053 /* 1054 * The choice of LT (signed) and GEU (unsigned) are biased toward 1055 * the instructions of the x86_64 host. For MO_8, the whole byte 1056 * is significant so we must use an unsigned compare; otherwise we 1057 * have already masked to a byte and so a signed compare works. 1058 * Other tcg hosts have a full set of comparisons and do not care. 1059 */ 1060 if (vece == MO_8) { 1061 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1063 tcg_gen_andc_vec(vece, lval, lval, lsh); 1064 tcg_gen_andc_vec(vece, rval, rval, rsh); 1065 } else { 1066 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1068 tcg_gen_and_vec(vece, lval, lval, lsh); 1069 tcg_gen_and_vec(vece, rval, rval, rsh); 1070 } 1071 tcg_gen_or_vec(vece, dst, lval, rval); 1072 } 1073 1074 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1075 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1076 { 1077 static const TCGOpcode vecop_list[] = { 1078 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1079 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1080 }; 1081 static const GVecGen3 ops[4] = { 1082 { .fniv = gen_ushl_vec, 1083 .fno = gen_helper_gvec_ushl_b, 1084 .opt_opc = vecop_list, 1085 .vece = MO_8 }, 1086 { .fniv = gen_ushl_vec, 1087 .fno = gen_helper_gvec_ushl_h, 1088 .opt_opc = vecop_list, 1089 .vece = MO_16 }, 1090 { .fni4 = gen_ushl_i32, 1091 .fniv = gen_ushl_vec, 1092 .opt_opc = vecop_list, 1093 .vece = MO_32 }, 1094 { .fni8 = gen_ushl_i64, 1095 .fniv = gen_ushl_vec, 1096 .opt_opc = vecop_list, 1097 .vece = MO_64 }, 1098 }; 1099 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1100 } 1101 1102 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1103 { 1104 TCGv_i32 lval = tcg_temp_new_i32(); 1105 TCGv_i32 rval = tcg_temp_new_i32(); 1106 TCGv_i32 lsh = tcg_temp_new_i32(); 1107 TCGv_i32 rsh = tcg_temp_new_i32(); 1108 TCGv_i32 zero = tcg_constant_i32(0); 1109 TCGv_i32 max = tcg_constant_i32(31); 1110 1111 /* 1112 * Rely on the TCG guarantee that out of range shifts produce 1113 * unspecified results, not undefined behaviour (i.e. no trap). 1114 * Discard out-of-range results after the fact. 1115 */ 1116 tcg_gen_ext8s_i32(lsh, shift); 1117 tcg_gen_neg_i32(rsh, lsh); 1118 tcg_gen_shl_i32(lval, src, lsh); 1119 tcg_gen_umin_i32(rsh, rsh, max); 1120 tcg_gen_sar_i32(rval, src, rsh); 1121 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1122 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1123 } 1124 1125 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1126 { 1127 TCGv_i64 lval = tcg_temp_new_i64(); 1128 TCGv_i64 rval = tcg_temp_new_i64(); 1129 TCGv_i64 lsh = tcg_temp_new_i64(); 1130 TCGv_i64 rsh = tcg_temp_new_i64(); 1131 TCGv_i64 zero = tcg_constant_i64(0); 1132 TCGv_i64 max = tcg_constant_i64(63); 1133 1134 /* 1135 * Rely on the TCG guarantee that out of range shifts produce 1136 * unspecified results, not undefined behaviour (i.e. no trap). 1137 * Discard out-of-range results after the fact. 1138 */ 1139 tcg_gen_ext8s_i64(lsh, shift); 1140 tcg_gen_neg_i64(rsh, lsh); 1141 tcg_gen_shl_i64(lval, src, lsh); 1142 tcg_gen_umin_i64(rsh, rsh, max); 1143 tcg_gen_sar_i64(rval, src, rsh); 1144 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1145 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1146 } 1147 1148 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1149 TCGv_vec src, TCGv_vec shift) 1150 { 1151 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1152 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1153 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1154 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1155 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1156 1157 /* 1158 * Rely on the TCG guarantee that out of range shifts produce 1159 * unspecified results, not undefined behaviour (i.e. no trap). 1160 * Discard out-of-range results after the fact. 1161 */ 1162 tcg_gen_neg_vec(vece, rsh, shift); 1163 if (vece == MO_8) { 1164 tcg_gen_mov_vec(lsh, shift); 1165 } else { 1166 tcg_gen_dupi_vec(vece, tmp, 0xff); 1167 tcg_gen_and_vec(vece, lsh, shift, tmp); 1168 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1169 } 1170 1171 /* Bound rsh so out of bound right shift gets -1. */ 1172 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1173 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1174 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1175 1176 tcg_gen_shlv_vec(vece, lval, src, lsh); 1177 tcg_gen_sarv_vec(vece, rval, src, rsh); 1178 1179 /* Select in-bound left shift. */ 1180 tcg_gen_andc_vec(vece, lval, lval, tmp); 1181 1182 /* Select between left and right shift. */ 1183 if (vece == MO_8) { 1184 tcg_gen_dupi_vec(vece, tmp, 0); 1185 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1186 } else { 1187 tcg_gen_dupi_vec(vece, tmp, 0x80); 1188 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1189 } 1190 } 1191 1192 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1193 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1194 { 1195 static const TCGOpcode vecop_list[] = { 1196 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1197 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1198 }; 1199 static const GVecGen3 ops[4] = { 1200 { .fniv = gen_sshl_vec, 1201 .fno = gen_helper_gvec_sshl_b, 1202 .opt_opc = vecop_list, 1203 .vece = MO_8 }, 1204 { .fniv = gen_sshl_vec, 1205 .fno = gen_helper_gvec_sshl_h, 1206 .opt_opc = vecop_list, 1207 .vece = MO_16 }, 1208 { .fni4 = gen_sshl_i32, 1209 .fniv = gen_sshl_vec, 1210 .opt_opc = vecop_list, 1211 .vece = MO_32 }, 1212 { .fni8 = gen_sshl_i64, 1213 .fniv = gen_sshl_vec, 1214 .opt_opc = vecop_list, 1215 .vece = MO_64 }, 1216 }; 1217 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1218 } 1219 1220 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1221 TCGv_vec a, TCGv_vec b) 1222 { 1223 TCGv_vec x = tcg_temp_new_vec_matching(t); 1224 tcg_gen_add_vec(vece, x, a, b); 1225 tcg_gen_usadd_vec(vece, t, a, b); 1226 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1227 tcg_gen_or_vec(vece, sat, sat, x); 1228 } 1229 1230 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1231 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1232 { 1233 static const TCGOpcode vecop_list[] = { 1234 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 1235 }; 1236 static const GVecGen4 ops[4] = { 1237 { .fniv = gen_uqadd_vec, 1238 .fno = gen_helper_gvec_uqadd_b, 1239 .write_aofs = true, 1240 .opt_opc = vecop_list, 1241 .vece = MO_8 }, 1242 { .fniv = gen_uqadd_vec, 1243 .fno = gen_helper_gvec_uqadd_h, 1244 .write_aofs = true, 1245 .opt_opc = vecop_list, 1246 .vece = MO_16 }, 1247 { .fniv = gen_uqadd_vec, 1248 .fno = gen_helper_gvec_uqadd_s, 1249 .write_aofs = true, 1250 .opt_opc = vecop_list, 1251 .vece = MO_32 }, 1252 { .fniv = gen_uqadd_vec, 1253 .fno = gen_helper_gvec_uqadd_d, 1254 .write_aofs = true, 1255 .opt_opc = vecop_list, 1256 .vece = MO_64 }, 1257 }; 1258 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1259 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1260 } 1261 1262 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1263 TCGv_vec a, TCGv_vec b) 1264 { 1265 TCGv_vec x = tcg_temp_new_vec_matching(t); 1266 tcg_gen_add_vec(vece, x, a, b); 1267 tcg_gen_ssadd_vec(vece, t, a, b); 1268 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1269 tcg_gen_or_vec(vece, sat, sat, x); 1270 } 1271 1272 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1273 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1274 { 1275 static const TCGOpcode vecop_list[] = { 1276 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 1277 }; 1278 static const GVecGen4 ops[4] = { 1279 { .fniv = gen_sqadd_vec, 1280 .fno = gen_helper_gvec_sqadd_b, 1281 .opt_opc = vecop_list, 1282 .write_aofs = true, 1283 .vece = MO_8 }, 1284 { .fniv = gen_sqadd_vec, 1285 .fno = gen_helper_gvec_sqadd_h, 1286 .opt_opc = vecop_list, 1287 .write_aofs = true, 1288 .vece = MO_16 }, 1289 { .fniv = gen_sqadd_vec, 1290 .fno = gen_helper_gvec_sqadd_s, 1291 .opt_opc = vecop_list, 1292 .write_aofs = true, 1293 .vece = MO_32 }, 1294 { .fniv = gen_sqadd_vec, 1295 .fno = gen_helper_gvec_sqadd_d, 1296 .opt_opc = vecop_list, 1297 .write_aofs = true, 1298 .vece = MO_64 }, 1299 }; 1300 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1301 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1302 } 1303 1304 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1305 TCGv_vec a, TCGv_vec b) 1306 { 1307 TCGv_vec x = tcg_temp_new_vec_matching(t); 1308 tcg_gen_sub_vec(vece, x, a, b); 1309 tcg_gen_ussub_vec(vece, t, a, b); 1310 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1311 tcg_gen_or_vec(vece, sat, sat, x); 1312 } 1313 1314 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1315 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1316 { 1317 static const TCGOpcode vecop_list[] = { 1318 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 1319 }; 1320 static const GVecGen4 ops[4] = { 1321 { .fniv = gen_uqsub_vec, 1322 .fno = gen_helper_gvec_uqsub_b, 1323 .opt_opc = vecop_list, 1324 .write_aofs = true, 1325 .vece = MO_8 }, 1326 { .fniv = gen_uqsub_vec, 1327 .fno = gen_helper_gvec_uqsub_h, 1328 .opt_opc = vecop_list, 1329 .write_aofs = true, 1330 .vece = MO_16 }, 1331 { .fniv = gen_uqsub_vec, 1332 .fno = gen_helper_gvec_uqsub_s, 1333 .opt_opc = vecop_list, 1334 .write_aofs = true, 1335 .vece = MO_32 }, 1336 { .fniv = gen_uqsub_vec, 1337 .fno = gen_helper_gvec_uqsub_d, 1338 .opt_opc = vecop_list, 1339 .write_aofs = true, 1340 .vece = MO_64 }, 1341 }; 1342 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1343 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1344 } 1345 1346 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, 1347 TCGv_vec a, TCGv_vec b) 1348 { 1349 TCGv_vec x = tcg_temp_new_vec_matching(t); 1350 tcg_gen_sub_vec(vece, x, a, b); 1351 tcg_gen_sssub_vec(vece, t, a, b); 1352 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); 1353 tcg_gen_or_vec(vece, sat, sat, x); 1354 } 1355 1356 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1357 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1358 { 1359 static const TCGOpcode vecop_list[] = { 1360 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 1361 }; 1362 static const GVecGen4 ops[4] = { 1363 { .fniv = gen_sqsub_vec, 1364 .fno = gen_helper_gvec_sqsub_b, 1365 .opt_opc = vecop_list, 1366 .write_aofs = true, 1367 .vece = MO_8 }, 1368 { .fniv = gen_sqsub_vec, 1369 .fno = gen_helper_gvec_sqsub_h, 1370 .opt_opc = vecop_list, 1371 .write_aofs = true, 1372 .vece = MO_16 }, 1373 { .fniv = gen_sqsub_vec, 1374 .fno = gen_helper_gvec_sqsub_s, 1375 .opt_opc = vecop_list, 1376 .write_aofs = true, 1377 .vece = MO_32 }, 1378 { .fniv = gen_sqsub_vec, 1379 .fno = gen_helper_gvec_sqsub_d, 1380 .opt_opc = vecop_list, 1381 .write_aofs = true, 1382 .vece = MO_64 }, 1383 }; 1384 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1385 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1386 } 1387 1388 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1389 { 1390 TCGv_i32 t = tcg_temp_new_i32(); 1391 1392 tcg_gen_sub_i32(t, a, b); 1393 tcg_gen_sub_i32(d, b, a); 1394 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1395 } 1396 1397 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1398 { 1399 TCGv_i64 t = tcg_temp_new_i64(); 1400 1401 tcg_gen_sub_i64(t, a, b); 1402 tcg_gen_sub_i64(d, b, a); 1403 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1404 } 1405 1406 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1407 { 1408 TCGv_vec t = tcg_temp_new_vec_matching(d); 1409 1410 tcg_gen_smin_vec(vece, t, a, b); 1411 tcg_gen_smax_vec(vece, d, a, b); 1412 tcg_gen_sub_vec(vece, d, d, t); 1413 } 1414 1415 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1416 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1417 { 1418 static const TCGOpcode vecop_list[] = { 1419 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1420 }; 1421 static const GVecGen3 ops[4] = { 1422 { .fniv = gen_sabd_vec, 1423 .fno = gen_helper_gvec_sabd_b, 1424 .opt_opc = vecop_list, 1425 .vece = MO_8 }, 1426 { .fniv = gen_sabd_vec, 1427 .fno = gen_helper_gvec_sabd_h, 1428 .opt_opc = vecop_list, 1429 .vece = MO_16 }, 1430 { .fni4 = gen_sabd_i32, 1431 .fniv = gen_sabd_vec, 1432 .fno = gen_helper_gvec_sabd_s, 1433 .opt_opc = vecop_list, 1434 .vece = MO_32 }, 1435 { .fni8 = gen_sabd_i64, 1436 .fniv = gen_sabd_vec, 1437 .fno = gen_helper_gvec_sabd_d, 1438 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1439 .opt_opc = vecop_list, 1440 .vece = MO_64 }, 1441 }; 1442 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1443 } 1444 1445 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1446 { 1447 TCGv_i32 t = tcg_temp_new_i32(); 1448 1449 tcg_gen_sub_i32(t, a, b); 1450 tcg_gen_sub_i32(d, b, a); 1451 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1452 } 1453 1454 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1455 { 1456 TCGv_i64 t = tcg_temp_new_i64(); 1457 1458 tcg_gen_sub_i64(t, a, b); 1459 tcg_gen_sub_i64(d, b, a); 1460 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1461 } 1462 1463 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1464 { 1465 TCGv_vec t = tcg_temp_new_vec_matching(d); 1466 1467 tcg_gen_umin_vec(vece, t, a, b); 1468 tcg_gen_umax_vec(vece, d, a, b); 1469 tcg_gen_sub_vec(vece, d, d, t); 1470 } 1471 1472 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1473 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1474 { 1475 static const TCGOpcode vecop_list[] = { 1476 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1477 }; 1478 static const GVecGen3 ops[4] = { 1479 { .fniv = gen_uabd_vec, 1480 .fno = gen_helper_gvec_uabd_b, 1481 .opt_opc = vecop_list, 1482 .vece = MO_8 }, 1483 { .fniv = gen_uabd_vec, 1484 .fno = gen_helper_gvec_uabd_h, 1485 .opt_opc = vecop_list, 1486 .vece = MO_16 }, 1487 { .fni4 = gen_uabd_i32, 1488 .fniv = gen_uabd_vec, 1489 .fno = gen_helper_gvec_uabd_s, 1490 .opt_opc = vecop_list, 1491 .vece = MO_32 }, 1492 { .fni8 = gen_uabd_i64, 1493 .fniv = gen_uabd_vec, 1494 .fno = gen_helper_gvec_uabd_d, 1495 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1496 .opt_opc = vecop_list, 1497 .vece = MO_64 }, 1498 }; 1499 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1500 } 1501 1502 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1503 { 1504 TCGv_i32 t = tcg_temp_new_i32(); 1505 gen_sabd_i32(t, a, b); 1506 tcg_gen_add_i32(d, d, t); 1507 } 1508 1509 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1510 { 1511 TCGv_i64 t = tcg_temp_new_i64(); 1512 gen_sabd_i64(t, a, b); 1513 tcg_gen_add_i64(d, d, t); 1514 } 1515 1516 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1517 { 1518 TCGv_vec t = tcg_temp_new_vec_matching(d); 1519 gen_sabd_vec(vece, t, a, b); 1520 tcg_gen_add_vec(vece, d, d, t); 1521 } 1522 1523 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1524 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1525 { 1526 static const TCGOpcode vecop_list[] = { 1527 INDEX_op_sub_vec, INDEX_op_add_vec, 1528 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1529 }; 1530 static const GVecGen3 ops[4] = { 1531 { .fniv = gen_saba_vec, 1532 .fno = gen_helper_gvec_saba_b, 1533 .opt_opc = vecop_list, 1534 .load_dest = true, 1535 .vece = MO_8 }, 1536 { .fniv = gen_saba_vec, 1537 .fno = gen_helper_gvec_saba_h, 1538 .opt_opc = vecop_list, 1539 .load_dest = true, 1540 .vece = MO_16 }, 1541 { .fni4 = gen_saba_i32, 1542 .fniv = gen_saba_vec, 1543 .fno = gen_helper_gvec_saba_s, 1544 .opt_opc = vecop_list, 1545 .load_dest = true, 1546 .vece = MO_32 }, 1547 { .fni8 = gen_saba_i64, 1548 .fniv = gen_saba_vec, 1549 .fno = gen_helper_gvec_saba_d, 1550 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1551 .opt_opc = vecop_list, 1552 .load_dest = true, 1553 .vece = MO_64 }, 1554 }; 1555 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1556 } 1557 1558 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1559 { 1560 TCGv_i32 t = tcg_temp_new_i32(); 1561 gen_uabd_i32(t, a, b); 1562 tcg_gen_add_i32(d, d, t); 1563 } 1564 1565 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1566 { 1567 TCGv_i64 t = tcg_temp_new_i64(); 1568 gen_uabd_i64(t, a, b); 1569 tcg_gen_add_i64(d, d, t); 1570 } 1571 1572 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1573 { 1574 TCGv_vec t = tcg_temp_new_vec_matching(d); 1575 gen_uabd_vec(vece, t, a, b); 1576 tcg_gen_add_vec(vece, d, d, t); 1577 } 1578 1579 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1580 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1581 { 1582 static const TCGOpcode vecop_list[] = { 1583 INDEX_op_sub_vec, INDEX_op_add_vec, 1584 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1585 }; 1586 static const GVecGen3 ops[4] = { 1587 { .fniv = gen_uaba_vec, 1588 .fno = gen_helper_gvec_uaba_b, 1589 .opt_opc = vecop_list, 1590 .load_dest = true, 1591 .vece = MO_8 }, 1592 { .fniv = gen_uaba_vec, 1593 .fno = gen_helper_gvec_uaba_h, 1594 .opt_opc = vecop_list, 1595 .load_dest = true, 1596 .vece = MO_16 }, 1597 { .fni4 = gen_uaba_i32, 1598 .fniv = gen_uaba_vec, 1599 .fno = gen_helper_gvec_uaba_s, 1600 .opt_opc = vecop_list, 1601 .load_dest = true, 1602 .vece = MO_32 }, 1603 { .fni8 = gen_uaba_i64, 1604 .fniv = gen_uaba_vec, 1605 .fno = gen_helper_gvec_uaba_d, 1606 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1607 .opt_opc = vecop_list, 1608 .load_dest = true, 1609 .vece = MO_64 }, 1610 }; 1611 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1612 } 1613 1614 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1615 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1616 { 1617 static gen_helper_gvec_3 * const fns[4] = { 1618 gen_helper_gvec_addp_b, 1619 gen_helper_gvec_addp_h, 1620 gen_helper_gvec_addp_s, 1621 gen_helper_gvec_addp_d, 1622 }; 1623 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1624 } 1625 1626 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1627 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1628 { 1629 static gen_helper_gvec_3 * const fns[4] = { 1630 gen_helper_gvec_smaxp_b, 1631 gen_helper_gvec_smaxp_h, 1632 gen_helper_gvec_smaxp_s, 1633 }; 1634 tcg_debug_assert(vece <= MO_32); 1635 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1636 } 1637 1638 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1639 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1640 { 1641 static gen_helper_gvec_3 * const fns[4] = { 1642 gen_helper_gvec_sminp_b, 1643 gen_helper_gvec_sminp_h, 1644 gen_helper_gvec_sminp_s, 1645 }; 1646 tcg_debug_assert(vece <= MO_32); 1647 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1648 } 1649 1650 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1651 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1652 { 1653 static gen_helper_gvec_3 * const fns[4] = { 1654 gen_helper_gvec_umaxp_b, 1655 gen_helper_gvec_umaxp_h, 1656 gen_helper_gvec_umaxp_s, 1657 }; 1658 tcg_debug_assert(vece <= MO_32); 1659 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1660 } 1661 1662 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1663 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1664 { 1665 static gen_helper_gvec_3 * const fns[4] = { 1666 gen_helper_gvec_uminp_b, 1667 gen_helper_gvec_uminp_h, 1668 gen_helper_gvec_uminp_s, 1669 }; 1670 tcg_debug_assert(vece <= MO_32); 1671 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1672 } 1673