1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 35 opr_sz, max_sz, 0, fn); 36 } 37 38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40 { 41 static gen_helper_gvec_3_ptr * const fns[2] = { 42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 43 }; 44 tcg_debug_assert(vece >= 1 && vece <= 2); 45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46 } 47 48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50 { 51 static gen_helper_gvec_3_ptr * const fns[2] = { 52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 53 }; 54 tcg_debug_assert(vece >= 1 && vece <= 2); 55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56 } 57 58 #define GEN_CMP0(NAME, COND) \ 59 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 60 uint32_t opr_sz, uint32_t max_sz) \ 61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 62 63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 68 69 #undef GEN_CMP0 70 71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72 { 73 tcg_gen_vec_sar8i_i64(a, a, shift); 74 tcg_gen_vec_add8_i64(d, d, a); 75 } 76 77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78 { 79 tcg_gen_vec_sar16i_i64(a, a, shift); 80 tcg_gen_vec_add16_i64(d, d, a); 81 } 82 83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 84 { 85 tcg_gen_sari_i32(a, a, shift); 86 tcg_gen_add_i32(d, d, a); 87 } 88 89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 90 { 91 tcg_gen_sari_i64(a, a, shift); 92 tcg_gen_add_i64(d, d, a); 93 } 94 95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 96 { 97 tcg_gen_sari_vec(vece, a, a, sh); 98 tcg_gen_add_vec(vece, d, d, a); 99 } 100 101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 102 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 103 { 104 static const TCGOpcode vecop_list[] = { 105 INDEX_op_sari_vec, INDEX_op_add_vec, 0 106 }; 107 static const GVecGen2i ops[4] = { 108 { .fni8 = gen_ssra8_i64, 109 .fniv = gen_ssra_vec, 110 .fno = gen_helper_gvec_ssra_b, 111 .load_dest = true, 112 .opt_opc = vecop_list, 113 .vece = MO_8 }, 114 { .fni8 = gen_ssra16_i64, 115 .fniv = gen_ssra_vec, 116 .fno = gen_helper_gvec_ssra_h, 117 .load_dest = true, 118 .opt_opc = vecop_list, 119 .vece = MO_16 }, 120 { .fni4 = gen_ssra32_i32, 121 .fniv = gen_ssra_vec, 122 .fno = gen_helper_gvec_ssra_s, 123 .load_dest = true, 124 .opt_opc = vecop_list, 125 .vece = MO_32 }, 126 { .fni8 = gen_ssra64_i64, 127 .fniv = gen_ssra_vec, 128 .fno = gen_helper_gvec_ssra_d, 129 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 130 .opt_opc = vecop_list, 131 .load_dest = true, 132 .vece = MO_64 }, 133 }; 134 135 /* tszimm encoding produces immediates in the range [1..esize]. */ 136 tcg_debug_assert(shift > 0); 137 tcg_debug_assert(shift <= (8 << vece)); 138 139 /* 140 * Shifts larger than the element size are architecturally valid. 141 * Signed results in all sign bits. 142 */ 143 shift = MIN(shift, (8 << vece) - 1); 144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 145 } 146 147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 148 { 149 tcg_gen_vec_shr8i_i64(a, a, shift); 150 tcg_gen_vec_add8_i64(d, d, a); 151 } 152 153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 154 { 155 tcg_gen_vec_shr16i_i64(a, a, shift); 156 tcg_gen_vec_add16_i64(d, d, a); 157 } 158 159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 160 { 161 tcg_gen_shri_i32(a, a, shift); 162 tcg_gen_add_i32(d, d, a); 163 } 164 165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 166 { 167 tcg_gen_shri_i64(a, a, shift); 168 tcg_gen_add_i64(d, d, a); 169 } 170 171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 172 { 173 tcg_gen_shri_vec(vece, a, a, sh); 174 tcg_gen_add_vec(vece, d, d, a); 175 } 176 177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 178 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 179 { 180 static const TCGOpcode vecop_list[] = { 181 INDEX_op_shri_vec, INDEX_op_add_vec, 0 182 }; 183 static const GVecGen2i ops[4] = { 184 { .fni8 = gen_usra8_i64, 185 .fniv = gen_usra_vec, 186 .fno = gen_helper_gvec_usra_b, 187 .load_dest = true, 188 .opt_opc = vecop_list, 189 .vece = MO_8, }, 190 { .fni8 = gen_usra16_i64, 191 .fniv = gen_usra_vec, 192 .fno = gen_helper_gvec_usra_h, 193 .load_dest = true, 194 .opt_opc = vecop_list, 195 .vece = MO_16, }, 196 { .fni4 = gen_usra32_i32, 197 .fniv = gen_usra_vec, 198 .fno = gen_helper_gvec_usra_s, 199 .load_dest = true, 200 .opt_opc = vecop_list, 201 .vece = MO_32, }, 202 { .fni8 = gen_usra64_i64, 203 .fniv = gen_usra_vec, 204 .fno = gen_helper_gvec_usra_d, 205 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 206 .load_dest = true, 207 .opt_opc = vecop_list, 208 .vece = MO_64, }, 209 }; 210 211 /* tszimm encoding produces immediates in the range [1..esize]. */ 212 tcg_debug_assert(shift > 0); 213 tcg_debug_assert(shift <= (8 << vece)); 214 215 /* 216 * Shifts larger than the element size are architecturally valid. 217 * Unsigned results in all zeros as input to accumulate: nop. 218 */ 219 if (shift < (8 << vece)) { 220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 221 } else { 222 /* Nop, but we do need to clear the tail. */ 223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 224 } 225 } 226 227 /* 228 * Shift one less than the requested amount, and the low bit is 229 * the rounding bit. For the 8 and 16-bit operations, because we 230 * mask the low bit, we can perform a normal integer shift instead 231 * of a vector shift. 232 */ 233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 234 { 235 TCGv_i64 t = tcg_temp_new_i64(); 236 237 tcg_gen_shri_i64(t, a, sh - 1); 238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 239 tcg_gen_vec_sar8i_i64(d, a, sh); 240 tcg_gen_vec_add8_i64(d, d, t); 241 } 242 243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 244 { 245 TCGv_i64 t = tcg_temp_new_i64(); 246 247 tcg_gen_shri_i64(t, a, sh - 1); 248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 249 tcg_gen_vec_sar16i_i64(d, a, sh); 250 tcg_gen_vec_add16_i64(d, d, t); 251 } 252 253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 254 { 255 TCGv_i32 t; 256 257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 258 if (sh == 32) { 259 tcg_gen_movi_i32(d, 0); 260 return; 261 } 262 t = tcg_temp_new_i32(); 263 tcg_gen_extract_i32(t, a, sh - 1, 1); 264 tcg_gen_sari_i32(d, a, sh); 265 tcg_gen_add_i32(d, d, t); 266 } 267 268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 269 { 270 TCGv_i64 t = tcg_temp_new_i64(); 271 272 tcg_gen_extract_i64(t, a, sh - 1, 1); 273 tcg_gen_sari_i64(d, a, sh); 274 tcg_gen_add_i64(d, d, t); 275 } 276 277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 278 { 279 TCGv_vec t = tcg_temp_new_vec_matching(d); 280 TCGv_vec ones = tcg_temp_new_vec_matching(d); 281 282 tcg_gen_shri_vec(vece, t, a, sh - 1); 283 tcg_gen_dupi_vec(vece, ones, 1); 284 tcg_gen_and_vec(vece, t, t, ones); 285 tcg_gen_sari_vec(vece, d, a, sh); 286 tcg_gen_add_vec(vece, d, d, t); 287 } 288 289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 290 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 291 { 292 static const TCGOpcode vecop_list[] = { 293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 294 }; 295 static const GVecGen2i ops[4] = { 296 { .fni8 = gen_srshr8_i64, 297 .fniv = gen_srshr_vec, 298 .fno = gen_helper_gvec_srshr_b, 299 .opt_opc = vecop_list, 300 .vece = MO_8 }, 301 { .fni8 = gen_srshr16_i64, 302 .fniv = gen_srshr_vec, 303 .fno = gen_helper_gvec_srshr_h, 304 .opt_opc = vecop_list, 305 .vece = MO_16 }, 306 { .fni4 = gen_srshr32_i32, 307 .fniv = gen_srshr_vec, 308 .fno = gen_helper_gvec_srshr_s, 309 .opt_opc = vecop_list, 310 .vece = MO_32 }, 311 { .fni8 = gen_srshr64_i64, 312 .fniv = gen_srshr_vec, 313 .fno = gen_helper_gvec_srshr_d, 314 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 315 .opt_opc = vecop_list, 316 .vece = MO_64 }, 317 }; 318 319 /* tszimm encoding produces immediates in the range [1..esize] */ 320 tcg_debug_assert(shift > 0); 321 tcg_debug_assert(shift <= (8 << vece)); 322 323 if (shift == (8 << vece)) { 324 /* 325 * Shifts larger than the element size are architecturally valid. 326 * Signed results in all sign bits. With rounding, this produces 327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 328 * I.e. always zero. 329 */ 330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 331 } else { 332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 333 } 334 } 335 336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 337 { 338 TCGv_i64 t = tcg_temp_new_i64(); 339 340 gen_srshr8_i64(t, a, sh); 341 tcg_gen_vec_add8_i64(d, d, t); 342 } 343 344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 345 { 346 TCGv_i64 t = tcg_temp_new_i64(); 347 348 gen_srshr16_i64(t, a, sh); 349 tcg_gen_vec_add16_i64(d, d, t); 350 } 351 352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 353 { 354 TCGv_i32 t = tcg_temp_new_i32(); 355 356 gen_srshr32_i32(t, a, sh); 357 tcg_gen_add_i32(d, d, t); 358 } 359 360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 361 { 362 TCGv_i64 t = tcg_temp_new_i64(); 363 364 gen_srshr64_i64(t, a, sh); 365 tcg_gen_add_i64(d, d, t); 366 } 367 368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 369 { 370 TCGv_vec t = tcg_temp_new_vec_matching(d); 371 372 gen_srshr_vec(vece, t, a, sh); 373 tcg_gen_add_vec(vece, d, d, t); 374 } 375 376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 377 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 378 { 379 static const TCGOpcode vecop_list[] = { 380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 381 }; 382 static const GVecGen2i ops[4] = { 383 { .fni8 = gen_srsra8_i64, 384 .fniv = gen_srsra_vec, 385 .fno = gen_helper_gvec_srsra_b, 386 .opt_opc = vecop_list, 387 .load_dest = true, 388 .vece = MO_8 }, 389 { .fni8 = gen_srsra16_i64, 390 .fniv = gen_srsra_vec, 391 .fno = gen_helper_gvec_srsra_h, 392 .opt_opc = vecop_list, 393 .load_dest = true, 394 .vece = MO_16 }, 395 { .fni4 = gen_srsra32_i32, 396 .fniv = gen_srsra_vec, 397 .fno = gen_helper_gvec_srsra_s, 398 .opt_opc = vecop_list, 399 .load_dest = true, 400 .vece = MO_32 }, 401 { .fni8 = gen_srsra64_i64, 402 .fniv = gen_srsra_vec, 403 .fno = gen_helper_gvec_srsra_d, 404 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 405 .opt_opc = vecop_list, 406 .load_dest = true, 407 .vece = MO_64 }, 408 }; 409 410 /* tszimm encoding produces immediates in the range [1..esize] */ 411 tcg_debug_assert(shift > 0); 412 tcg_debug_assert(shift <= (8 << vece)); 413 414 /* 415 * Shifts larger than the element size are architecturally valid. 416 * Signed results in all sign bits. With rounding, this produces 417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 418 * I.e. always zero. With accumulation, this leaves D unchanged. 419 */ 420 if (shift == (8 << vece)) { 421 /* Nop, but we do need to clear the tail. */ 422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 423 } else { 424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 425 } 426 } 427 428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 429 { 430 TCGv_i64 t = tcg_temp_new_i64(); 431 432 tcg_gen_shri_i64(t, a, sh - 1); 433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 434 tcg_gen_vec_shr8i_i64(d, a, sh); 435 tcg_gen_vec_add8_i64(d, d, t); 436 } 437 438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 439 { 440 TCGv_i64 t = tcg_temp_new_i64(); 441 442 tcg_gen_shri_i64(t, a, sh - 1); 443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 444 tcg_gen_vec_shr16i_i64(d, a, sh); 445 tcg_gen_vec_add16_i64(d, d, t); 446 } 447 448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 449 { 450 TCGv_i32 t; 451 452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 453 if (sh == 32) { 454 tcg_gen_extract_i32(d, a, sh - 1, 1); 455 return; 456 } 457 t = tcg_temp_new_i32(); 458 tcg_gen_extract_i32(t, a, sh - 1, 1); 459 tcg_gen_shri_i32(d, a, sh); 460 tcg_gen_add_i32(d, d, t); 461 } 462 463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 464 { 465 TCGv_i64 t = tcg_temp_new_i64(); 466 467 tcg_gen_extract_i64(t, a, sh - 1, 1); 468 tcg_gen_shri_i64(d, a, sh); 469 tcg_gen_add_i64(d, d, t); 470 } 471 472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 473 { 474 TCGv_vec t = tcg_temp_new_vec_matching(d); 475 TCGv_vec ones = tcg_temp_new_vec_matching(d); 476 477 tcg_gen_shri_vec(vece, t, a, shift - 1); 478 tcg_gen_dupi_vec(vece, ones, 1); 479 tcg_gen_and_vec(vece, t, t, ones); 480 tcg_gen_shri_vec(vece, d, a, shift); 481 tcg_gen_add_vec(vece, d, d, t); 482 } 483 484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 485 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 486 { 487 static const TCGOpcode vecop_list[] = { 488 INDEX_op_shri_vec, INDEX_op_add_vec, 0 489 }; 490 static const GVecGen2i ops[4] = { 491 { .fni8 = gen_urshr8_i64, 492 .fniv = gen_urshr_vec, 493 .fno = gen_helper_gvec_urshr_b, 494 .opt_opc = vecop_list, 495 .vece = MO_8 }, 496 { .fni8 = gen_urshr16_i64, 497 .fniv = gen_urshr_vec, 498 .fno = gen_helper_gvec_urshr_h, 499 .opt_opc = vecop_list, 500 .vece = MO_16 }, 501 { .fni4 = gen_urshr32_i32, 502 .fniv = gen_urshr_vec, 503 .fno = gen_helper_gvec_urshr_s, 504 .opt_opc = vecop_list, 505 .vece = MO_32 }, 506 { .fni8 = gen_urshr64_i64, 507 .fniv = gen_urshr_vec, 508 .fno = gen_helper_gvec_urshr_d, 509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 510 .opt_opc = vecop_list, 511 .vece = MO_64 }, 512 }; 513 514 /* tszimm encoding produces immediates in the range [1..esize] */ 515 tcg_debug_assert(shift > 0); 516 tcg_debug_assert(shift <= (8 << vece)); 517 518 if (shift == (8 << vece)) { 519 /* 520 * Shifts larger than the element size are architecturally valid. 521 * Unsigned results in zero. With rounding, this produces a 522 * copy of the most significant bit. 523 */ 524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 525 } else { 526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 527 } 528 } 529 530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 534 if (sh == 8) { 535 tcg_gen_vec_shr8i_i64(t, a, 7); 536 } else { 537 gen_urshr8_i64(t, a, sh); 538 } 539 tcg_gen_vec_add8_i64(d, d, t); 540 } 541 542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 543 { 544 TCGv_i64 t = tcg_temp_new_i64(); 545 546 if (sh == 16) { 547 tcg_gen_vec_shr16i_i64(t, a, 15); 548 } else { 549 gen_urshr16_i64(t, a, sh); 550 } 551 tcg_gen_vec_add16_i64(d, d, t); 552 } 553 554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 555 { 556 TCGv_i32 t = tcg_temp_new_i32(); 557 558 if (sh == 32) { 559 tcg_gen_shri_i32(t, a, 31); 560 } else { 561 gen_urshr32_i32(t, a, sh); 562 } 563 tcg_gen_add_i32(d, d, t); 564 } 565 566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 567 { 568 TCGv_i64 t = tcg_temp_new_i64(); 569 570 if (sh == 64) { 571 tcg_gen_shri_i64(t, a, 63); 572 } else { 573 gen_urshr64_i64(t, a, sh); 574 } 575 tcg_gen_add_i64(d, d, t); 576 } 577 578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 579 { 580 TCGv_vec t = tcg_temp_new_vec_matching(d); 581 582 if (sh == (8 << vece)) { 583 tcg_gen_shri_vec(vece, t, a, sh - 1); 584 } else { 585 gen_urshr_vec(vece, t, a, sh); 586 } 587 tcg_gen_add_vec(vece, d, d, t); 588 } 589 590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 591 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 592 { 593 static const TCGOpcode vecop_list[] = { 594 INDEX_op_shri_vec, INDEX_op_add_vec, 0 595 }; 596 static const GVecGen2i ops[4] = { 597 { .fni8 = gen_ursra8_i64, 598 .fniv = gen_ursra_vec, 599 .fno = gen_helper_gvec_ursra_b, 600 .opt_opc = vecop_list, 601 .load_dest = true, 602 .vece = MO_8 }, 603 { .fni8 = gen_ursra16_i64, 604 .fniv = gen_ursra_vec, 605 .fno = gen_helper_gvec_ursra_h, 606 .opt_opc = vecop_list, 607 .load_dest = true, 608 .vece = MO_16 }, 609 { .fni4 = gen_ursra32_i32, 610 .fniv = gen_ursra_vec, 611 .fno = gen_helper_gvec_ursra_s, 612 .opt_opc = vecop_list, 613 .load_dest = true, 614 .vece = MO_32 }, 615 { .fni8 = gen_ursra64_i64, 616 .fniv = gen_ursra_vec, 617 .fno = gen_helper_gvec_ursra_d, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 .opt_opc = vecop_list, 620 .load_dest = true, 621 .vece = MO_64 }, 622 }; 623 624 /* tszimm encoding produces immediates in the range [1..esize] */ 625 tcg_debug_assert(shift > 0); 626 tcg_debug_assert(shift <= (8 << vece)); 627 628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 629 } 630 631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 632 { 633 uint64_t mask = dup_const(MO_8, 0xff >> shift); 634 TCGv_i64 t = tcg_temp_new_i64(); 635 636 tcg_gen_shri_i64(t, a, shift); 637 tcg_gen_andi_i64(t, t, mask); 638 tcg_gen_andi_i64(d, d, ~mask); 639 tcg_gen_or_i64(d, d, t); 640 } 641 642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 643 { 644 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 645 TCGv_i64 t = tcg_temp_new_i64(); 646 647 tcg_gen_shri_i64(t, a, shift); 648 tcg_gen_andi_i64(t, t, mask); 649 tcg_gen_andi_i64(d, d, ~mask); 650 tcg_gen_or_i64(d, d, t); 651 } 652 653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 654 { 655 tcg_gen_shri_i32(a, a, shift); 656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 657 } 658 659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 660 { 661 tcg_gen_shri_i64(a, a, shift); 662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 663 } 664 665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 666 { 667 TCGv_vec t = tcg_temp_new_vec_matching(d); 668 TCGv_vec m = tcg_temp_new_vec_matching(d); 669 670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 671 tcg_gen_shri_vec(vece, t, a, sh); 672 tcg_gen_and_vec(vece, d, d, m); 673 tcg_gen_or_vec(vece, d, d, t); 674 } 675 676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 677 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 678 { 679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 680 const GVecGen2i ops[4] = { 681 { .fni8 = gen_shr8_ins_i64, 682 .fniv = gen_shr_ins_vec, 683 .fno = gen_helper_gvec_sri_b, 684 .load_dest = true, 685 .opt_opc = vecop_list, 686 .vece = MO_8 }, 687 { .fni8 = gen_shr16_ins_i64, 688 .fniv = gen_shr_ins_vec, 689 .fno = gen_helper_gvec_sri_h, 690 .load_dest = true, 691 .opt_opc = vecop_list, 692 .vece = MO_16 }, 693 { .fni4 = gen_shr32_ins_i32, 694 .fniv = gen_shr_ins_vec, 695 .fno = gen_helper_gvec_sri_s, 696 .load_dest = true, 697 .opt_opc = vecop_list, 698 .vece = MO_32 }, 699 { .fni8 = gen_shr64_ins_i64, 700 .fniv = gen_shr_ins_vec, 701 .fno = gen_helper_gvec_sri_d, 702 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 703 .load_dest = true, 704 .opt_opc = vecop_list, 705 .vece = MO_64 }, 706 }; 707 708 /* tszimm encoding produces immediates in the range [1..esize]. */ 709 tcg_debug_assert(shift > 0); 710 tcg_debug_assert(shift <= (8 << vece)); 711 712 /* Shift of esize leaves destination unchanged. */ 713 if (shift < (8 << vece)) { 714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 715 } else { 716 /* Nop, but we do need to clear the tail. */ 717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 718 } 719 } 720 721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 722 { 723 uint64_t mask = dup_const(MO_8, 0xff << shift); 724 TCGv_i64 t = tcg_temp_new_i64(); 725 726 tcg_gen_shli_i64(t, a, shift); 727 tcg_gen_andi_i64(t, t, mask); 728 tcg_gen_andi_i64(d, d, ~mask); 729 tcg_gen_or_i64(d, d, t); 730 } 731 732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 733 { 734 uint64_t mask = dup_const(MO_16, 0xffff << shift); 735 TCGv_i64 t = tcg_temp_new_i64(); 736 737 tcg_gen_shli_i64(t, a, shift); 738 tcg_gen_andi_i64(t, t, mask); 739 tcg_gen_andi_i64(d, d, ~mask); 740 tcg_gen_or_i64(d, d, t); 741 } 742 743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 744 { 745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 746 } 747 748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 749 { 750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 751 } 752 753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 754 { 755 TCGv_vec t = tcg_temp_new_vec_matching(d); 756 TCGv_vec m = tcg_temp_new_vec_matching(d); 757 758 tcg_gen_shli_vec(vece, t, a, sh); 759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 760 tcg_gen_and_vec(vece, d, d, m); 761 tcg_gen_or_vec(vece, d, d, t); 762 } 763 764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 765 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 766 { 767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 768 const GVecGen2i ops[4] = { 769 { .fni8 = gen_shl8_ins_i64, 770 .fniv = gen_shl_ins_vec, 771 .fno = gen_helper_gvec_sli_b, 772 .load_dest = true, 773 .opt_opc = vecop_list, 774 .vece = MO_8 }, 775 { .fni8 = gen_shl16_ins_i64, 776 .fniv = gen_shl_ins_vec, 777 .fno = gen_helper_gvec_sli_h, 778 .load_dest = true, 779 .opt_opc = vecop_list, 780 .vece = MO_16 }, 781 { .fni4 = gen_shl32_ins_i32, 782 .fniv = gen_shl_ins_vec, 783 .fno = gen_helper_gvec_sli_s, 784 .load_dest = true, 785 .opt_opc = vecop_list, 786 .vece = MO_32 }, 787 { .fni8 = gen_shl64_ins_i64, 788 .fniv = gen_shl_ins_vec, 789 .fno = gen_helper_gvec_sli_d, 790 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 791 .load_dest = true, 792 .opt_opc = vecop_list, 793 .vece = MO_64 }, 794 }; 795 796 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 797 tcg_debug_assert(shift >= 0); 798 tcg_debug_assert(shift < (8 << vece)); 799 800 if (shift == 0) { 801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 802 } else { 803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 804 } 805 } 806 807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 808 { 809 gen_helper_neon_mul_u8(a, a, b); 810 gen_helper_neon_add_u8(d, d, a); 811 } 812 813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 814 { 815 gen_helper_neon_mul_u8(a, a, b); 816 gen_helper_neon_sub_u8(d, d, a); 817 } 818 819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 820 { 821 gen_helper_neon_mul_u16(a, a, b); 822 gen_helper_neon_add_u16(d, d, a); 823 } 824 825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 826 { 827 gen_helper_neon_mul_u16(a, a, b); 828 gen_helper_neon_sub_u16(d, d, a); 829 } 830 831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 832 { 833 tcg_gen_mul_i32(a, a, b); 834 tcg_gen_add_i32(d, d, a); 835 } 836 837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 838 { 839 tcg_gen_mul_i32(a, a, b); 840 tcg_gen_sub_i32(d, d, a); 841 } 842 843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 844 { 845 tcg_gen_mul_i64(a, a, b); 846 tcg_gen_add_i64(d, d, a); 847 } 848 849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 850 { 851 tcg_gen_mul_i64(a, a, b); 852 tcg_gen_sub_i64(d, d, a); 853 } 854 855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 856 { 857 tcg_gen_mul_vec(vece, a, a, b); 858 tcg_gen_add_vec(vece, d, d, a); 859 } 860 861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 862 { 863 tcg_gen_mul_vec(vece, a, a, b); 864 tcg_gen_sub_vec(vece, d, d, a); 865 } 866 867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 868 * these tables are shared with AArch64 which does support them. 869 */ 870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 872 { 873 static const TCGOpcode vecop_list[] = { 874 INDEX_op_mul_vec, INDEX_op_add_vec, 0 875 }; 876 static const GVecGen3 ops[4] = { 877 { .fni4 = gen_mla8_i32, 878 .fniv = gen_mla_vec, 879 .load_dest = true, 880 .opt_opc = vecop_list, 881 .vece = MO_8 }, 882 { .fni4 = gen_mla16_i32, 883 .fniv = gen_mla_vec, 884 .load_dest = true, 885 .opt_opc = vecop_list, 886 .vece = MO_16 }, 887 { .fni4 = gen_mla32_i32, 888 .fniv = gen_mla_vec, 889 .load_dest = true, 890 .opt_opc = vecop_list, 891 .vece = MO_32 }, 892 { .fni8 = gen_mla64_i64, 893 .fniv = gen_mla_vec, 894 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 895 .load_dest = true, 896 .opt_opc = vecop_list, 897 .vece = MO_64 }, 898 }; 899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 900 } 901 902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 904 { 905 static const TCGOpcode vecop_list[] = { 906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 907 }; 908 static const GVecGen3 ops[4] = { 909 { .fni4 = gen_mls8_i32, 910 .fniv = gen_mls_vec, 911 .load_dest = true, 912 .opt_opc = vecop_list, 913 .vece = MO_8 }, 914 { .fni4 = gen_mls16_i32, 915 .fniv = gen_mls_vec, 916 .load_dest = true, 917 .opt_opc = vecop_list, 918 .vece = MO_16 }, 919 { .fni4 = gen_mls32_i32, 920 .fniv = gen_mls_vec, 921 .load_dest = true, 922 .opt_opc = vecop_list, 923 .vece = MO_32 }, 924 { .fni8 = gen_mls64_i64, 925 .fniv = gen_mls_vec, 926 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 927 .load_dest = true, 928 .opt_opc = vecop_list, 929 .vece = MO_64 }, 930 }; 931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 932 } 933 934 /* CMTST : test is "if (X & Y != 0)". */ 935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 936 { 937 tcg_gen_and_i32(d, a, b); 938 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 939 } 940 941 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 942 { 943 tcg_gen_and_i64(d, a, b); 944 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 945 } 946 947 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 948 { 949 tcg_gen_and_vec(vece, d, a, b); 950 tcg_gen_dupi_vec(vece, a, 0); 951 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 952 } 953 954 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 955 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 956 { 957 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 958 static const GVecGen3 ops[4] = { 959 { .fni4 = gen_helper_neon_tst_u8, 960 .fniv = gen_cmtst_vec, 961 .opt_opc = vecop_list, 962 .vece = MO_8 }, 963 { .fni4 = gen_helper_neon_tst_u16, 964 .fniv = gen_cmtst_vec, 965 .opt_opc = vecop_list, 966 .vece = MO_16 }, 967 { .fni4 = gen_cmtst_i32, 968 .fniv = gen_cmtst_vec, 969 .opt_opc = vecop_list, 970 .vece = MO_32 }, 971 { .fni8 = gen_cmtst_i64, 972 .fniv = gen_cmtst_vec, 973 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 974 .opt_opc = vecop_list, 975 .vece = MO_64 }, 976 }; 977 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 978 } 979 980 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 981 { 982 TCGv_i32 lval = tcg_temp_new_i32(); 983 TCGv_i32 rval = tcg_temp_new_i32(); 984 TCGv_i32 lsh = tcg_temp_new_i32(); 985 TCGv_i32 rsh = tcg_temp_new_i32(); 986 TCGv_i32 zero = tcg_constant_i32(0); 987 TCGv_i32 max = tcg_constant_i32(32); 988 989 /* 990 * Rely on the TCG guarantee that out of range shifts produce 991 * unspecified results, not undefined behaviour (i.e. no trap). 992 * Discard out-of-range results after the fact. 993 */ 994 tcg_gen_ext8s_i32(lsh, shift); 995 tcg_gen_neg_i32(rsh, lsh); 996 tcg_gen_shl_i32(lval, src, lsh); 997 tcg_gen_shr_i32(rval, src, rsh); 998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 999 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 1000 } 1001 1002 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1003 { 1004 TCGv_i64 lval = tcg_temp_new_i64(); 1005 TCGv_i64 rval = tcg_temp_new_i64(); 1006 TCGv_i64 lsh = tcg_temp_new_i64(); 1007 TCGv_i64 rsh = tcg_temp_new_i64(); 1008 TCGv_i64 zero = tcg_constant_i64(0); 1009 TCGv_i64 max = tcg_constant_i64(64); 1010 1011 /* 1012 * Rely on the TCG guarantee that out of range shifts produce 1013 * unspecified results, not undefined behaviour (i.e. no trap). 1014 * Discard out-of-range results after the fact. 1015 */ 1016 tcg_gen_ext8s_i64(lsh, shift); 1017 tcg_gen_neg_i64(rsh, lsh); 1018 tcg_gen_shl_i64(lval, src, lsh); 1019 tcg_gen_shr_i64(rval, src, rsh); 1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1021 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1022 } 1023 1024 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1025 TCGv_vec src, TCGv_vec shift) 1026 { 1027 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1028 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1029 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1030 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1031 TCGv_vec msk, max; 1032 1033 tcg_gen_neg_vec(vece, rsh, shift); 1034 if (vece == MO_8) { 1035 tcg_gen_mov_vec(lsh, shift); 1036 } else { 1037 msk = tcg_temp_new_vec_matching(dst); 1038 tcg_gen_dupi_vec(vece, msk, 0xff); 1039 tcg_gen_and_vec(vece, lsh, shift, msk); 1040 tcg_gen_and_vec(vece, rsh, rsh, msk); 1041 } 1042 1043 /* 1044 * Rely on the TCG guarantee that out of range shifts produce 1045 * unspecified results, not undefined behaviour (i.e. no trap). 1046 * Discard out-of-range results after the fact. 1047 */ 1048 tcg_gen_shlv_vec(vece, lval, src, lsh); 1049 tcg_gen_shrv_vec(vece, rval, src, rsh); 1050 1051 max = tcg_temp_new_vec_matching(dst); 1052 tcg_gen_dupi_vec(vece, max, 8 << vece); 1053 1054 /* 1055 * The choice of LT (signed) and GEU (unsigned) are biased toward 1056 * the instructions of the x86_64 host. For MO_8, the whole byte 1057 * is significant so we must use an unsigned compare; otherwise we 1058 * have already masked to a byte and so a signed compare works. 1059 * Other tcg hosts have a full set of comparisons and do not care. 1060 */ 1061 if (vece == MO_8) { 1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1063 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1064 tcg_gen_andc_vec(vece, lval, lval, lsh); 1065 tcg_gen_andc_vec(vece, rval, rval, rsh); 1066 } else { 1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1068 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1069 tcg_gen_and_vec(vece, lval, lval, lsh); 1070 tcg_gen_and_vec(vece, rval, rval, rsh); 1071 } 1072 tcg_gen_or_vec(vece, dst, lval, rval); 1073 } 1074 1075 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1076 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1077 { 1078 static const TCGOpcode vecop_list[] = { 1079 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1080 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1081 }; 1082 static const GVecGen3 ops[4] = { 1083 { .fniv = gen_ushl_vec, 1084 .fno = gen_helper_gvec_ushl_b, 1085 .opt_opc = vecop_list, 1086 .vece = MO_8 }, 1087 { .fniv = gen_ushl_vec, 1088 .fno = gen_helper_gvec_ushl_h, 1089 .opt_opc = vecop_list, 1090 .vece = MO_16 }, 1091 { .fni4 = gen_ushl_i32, 1092 .fniv = gen_ushl_vec, 1093 .opt_opc = vecop_list, 1094 .vece = MO_32 }, 1095 { .fni8 = gen_ushl_i64, 1096 .fniv = gen_ushl_vec, 1097 .opt_opc = vecop_list, 1098 .vece = MO_64 }, 1099 }; 1100 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1101 } 1102 1103 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1104 { 1105 TCGv_i32 lval = tcg_temp_new_i32(); 1106 TCGv_i32 rval = tcg_temp_new_i32(); 1107 TCGv_i32 lsh = tcg_temp_new_i32(); 1108 TCGv_i32 rsh = tcg_temp_new_i32(); 1109 TCGv_i32 zero = tcg_constant_i32(0); 1110 TCGv_i32 max = tcg_constant_i32(31); 1111 1112 /* 1113 * Rely on the TCG guarantee that out of range shifts produce 1114 * unspecified results, not undefined behaviour (i.e. no trap). 1115 * Discard out-of-range results after the fact. 1116 */ 1117 tcg_gen_ext8s_i32(lsh, shift); 1118 tcg_gen_neg_i32(rsh, lsh); 1119 tcg_gen_shl_i32(lval, src, lsh); 1120 tcg_gen_umin_i32(rsh, rsh, max); 1121 tcg_gen_sar_i32(rval, src, rsh); 1122 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1123 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1124 } 1125 1126 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1127 { 1128 TCGv_i64 lval = tcg_temp_new_i64(); 1129 TCGv_i64 rval = tcg_temp_new_i64(); 1130 TCGv_i64 lsh = tcg_temp_new_i64(); 1131 TCGv_i64 rsh = tcg_temp_new_i64(); 1132 TCGv_i64 zero = tcg_constant_i64(0); 1133 TCGv_i64 max = tcg_constant_i64(63); 1134 1135 /* 1136 * Rely on the TCG guarantee that out of range shifts produce 1137 * unspecified results, not undefined behaviour (i.e. no trap). 1138 * Discard out-of-range results after the fact. 1139 */ 1140 tcg_gen_ext8s_i64(lsh, shift); 1141 tcg_gen_neg_i64(rsh, lsh); 1142 tcg_gen_shl_i64(lval, src, lsh); 1143 tcg_gen_umin_i64(rsh, rsh, max); 1144 tcg_gen_sar_i64(rval, src, rsh); 1145 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1146 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1147 } 1148 1149 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1150 TCGv_vec src, TCGv_vec shift) 1151 { 1152 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1153 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1154 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1155 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1156 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1157 1158 /* 1159 * Rely on the TCG guarantee that out of range shifts produce 1160 * unspecified results, not undefined behaviour (i.e. no trap). 1161 * Discard out-of-range results after the fact. 1162 */ 1163 tcg_gen_neg_vec(vece, rsh, shift); 1164 if (vece == MO_8) { 1165 tcg_gen_mov_vec(lsh, shift); 1166 } else { 1167 tcg_gen_dupi_vec(vece, tmp, 0xff); 1168 tcg_gen_and_vec(vece, lsh, shift, tmp); 1169 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1170 } 1171 1172 /* Bound rsh so out of bound right shift gets -1. */ 1173 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1174 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1175 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1176 1177 tcg_gen_shlv_vec(vece, lval, src, lsh); 1178 tcg_gen_sarv_vec(vece, rval, src, rsh); 1179 1180 /* Select in-bound left shift. */ 1181 tcg_gen_andc_vec(vece, lval, lval, tmp); 1182 1183 /* Select between left and right shift. */ 1184 if (vece == MO_8) { 1185 tcg_gen_dupi_vec(vece, tmp, 0); 1186 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1187 } else { 1188 tcg_gen_dupi_vec(vece, tmp, 0x80); 1189 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1190 } 1191 } 1192 1193 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1194 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1195 { 1196 static const TCGOpcode vecop_list[] = { 1197 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1198 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1199 }; 1200 static const GVecGen3 ops[4] = { 1201 { .fniv = gen_sshl_vec, 1202 .fno = gen_helper_gvec_sshl_b, 1203 .opt_opc = vecop_list, 1204 .vece = MO_8 }, 1205 { .fniv = gen_sshl_vec, 1206 .fno = gen_helper_gvec_sshl_h, 1207 .opt_opc = vecop_list, 1208 .vece = MO_16 }, 1209 { .fni4 = gen_sshl_i32, 1210 .fniv = gen_sshl_vec, 1211 .opt_opc = vecop_list, 1212 .vece = MO_32 }, 1213 { .fni8 = gen_sshl_i64, 1214 .fniv = gen_sshl_vec, 1215 .opt_opc = vecop_list, 1216 .vece = MO_64 }, 1217 }; 1218 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1219 } 1220 1221 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1222 TCGv_vec a, TCGv_vec b) 1223 { 1224 TCGv_vec x = tcg_temp_new_vec_matching(t); 1225 tcg_gen_add_vec(vece, x, a, b); 1226 tcg_gen_usadd_vec(vece, t, a, b); 1227 tcg_gen_xor_vec(vece, x, x, t); 1228 tcg_gen_or_vec(vece, qc, qc, x); 1229 } 1230 1231 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1232 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1233 { 1234 static const TCGOpcode vecop_list[] = { 1235 INDEX_op_usadd_vec, INDEX_op_add_vec, 0 1236 }; 1237 static const GVecGen4 ops[4] = { 1238 { .fniv = gen_uqadd_vec, 1239 .fno = gen_helper_gvec_uqadd_b, 1240 .write_aofs = true, 1241 .opt_opc = vecop_list, 1242 .vece = MO_8 }, 1243 { .fniv = gen_uqadd_vec, 1244 .fno = gen_helper_gvec_uqadd_h, 1245 .write_aofs = true, 1246 .opt_opc = vecop_list, 1247 .vece = MO_16 }, 1248 { .fniv = gen_uqadd_vec, 1249 .fno = gen_helper_gvec_uqadd_s, 1250 .write_aofs = true, 1251 .opt_opc = vecop_list, 1252 .vece = MO_32 }, 1253 { .fniv = gen_uqadd_vec, 1254 .fno = gen_helper_gvec_uqadd_d, 1255 .write_aofs = true, 1256 .opt_opc = vecop_list, 1257 .vece = MO_64 }, 1258 }; 1259 1260 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1261 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1262 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1263 } 1264 1265 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1266 TCGv_vec a, TCGv_vec b) 1267 { 1268 TCGv_vec x = tcg_temp_new_vec_matching(t); 1269 tcg_gen_add_vec(vece, x, a, b); 1270 tcg_gen_ssadd_vec(vece, t, a, b); 1271 tcg_gen_xor_vec(vece, x, x, t); 1272 tcg_gen_or_vec(vece, qc, qc, x); 1273 } 1274 1275 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1276 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1277 { 1278 static const TCGOpcode vecop_list[] = { 1279 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 1280 }; 1281 static const GVecGen4 ops[4] = { 1282 { .fniv = gen_sqadd_vec, 1283 .fno = gen_helper_gvec_sqadd_b, 1284 .opt_opc = vecop_list, 1285 .write_aofs = true, 1286 .vece = MO_8 }, 1287 { .fniv = gen_sqadd_vec, 1288 .fno = gen_helper_gvec_sqadd_h, 1289 .opt_opc = vecop_list, 1290 .write_aofs = true, 1291 .vece = MO_16 }, 1292 { .fniv = gen_sqadd_vec, 1293 .fno = gen_helper_gvec_sqadd_s, 1294 .opt_opc = vecop_list, 1295 .write_aofs = true, 1296 .vece = MO_32 }, 1297 { .fniv = gen_sqadd_vec, 1298 .fno = gen_helper_gvec_sqadd_d, 1299 .opt_opc = vecop_list, 1300 .write_aofs = true, 1301 .vece = MO_64 }, 1302 }; 1303 1304 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1305 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1306 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1307 } 1308 1309 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1310 TCGv_vec a, TCGv_vec b) 1311 { 1312 TCGv_vec x = tcg_temp_new_vec_matching(t); 1313 tcg_gen_sub_vec(vece, x, a, b); 1314 tcg_gen_ussub_vec(vece, t, a, b); 1315 tcg_gen_xor_vec(vece, x, x, t); 1316 tcg_gen_or_vec(vece, qc, qc, x); 1317 } 1318 1319 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1320 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1321 { 1322 static const TCGOpcode vecop_list[] = { 1323 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 1324 }; 1325 static const GVecGen4 ops[4] = { 1326 { .fniv = gen_uqsub_vec, 1327 .fno = gen_helper_gvec_uqsub_b, 1328 .opt_opc = vecop_list, 1329 .write_aofs = true, 1330 .vece = MO_8 }, 1331 { .fniv = gen_uqsub_vec, 1332 .fno = gen_helper_gvec_uqsub_h, 1333 .opt_opc = vecop_list, 1334 .write_aofs = true, 1335 .vece = MO_16 }, 1336 { .fniv = gen_uqsub_vec, 1337 .fno = gen_helper_gvec_uqsub_s, 1338 .opt_opc = vecop_list, 1339 .write_aofs = true, 1340 .vece = MO_32 }, 1341 { .fniv = gen_uqsub_vec, 1342 .fno = gen_helper_gvec_uqsub_d, 1343 .opt_opc = vecop_list, 1344 .write_aofs = true, 1345 .vece = MO_64 }, 1346 }; 1347 1348 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1349 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1350 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1351 } 1352 1353 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1354 TCGv_vec a, TCGv_vec b) 1355 { 1356 TCGv_vec x = tcg_temp_new_vec_matching(t); 1357 tcg_gen_sub_vec(vece, x, a, b); 1358 tcg_gen_sssub_vec(vece, t, a, b); 1359 tcg_gen_xor_vec(vece, x, x, t); 1360 tcg_gen_or_vec(vece, qc, qc, x); 1361 } 1362 1363 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1364 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1365 { 1366 static const TCGOpcode vecop_list[] = { 1367 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 1368 }; 1369 static const GVecGen4 ops[4] = { 1370 { .fniv = gen_sqsub_vec, 1371 .fno = gen_helper_gvec_sqsub_b, 1372 .opt_opc = vecop_list, 1373 .write_aofs = true, 1374 .vece = MO_8 }, 1375 { .fniv = gen_sqsub_vec, 1376 .fno = gen_helper_gvec_sqsub_h, 1377 .opt_opc = vecop_list, 1378 .write_aofs = true, 1379 .vece = MO_16 }, 1380 { .fniv = gen_sqsub_vec, 1381 .fno = gen_helper_gvec_sqsub_s, 1382 .opt_opc = vecop_list, 1383 .write_aofs = true, 1384 .vece = MO_32 }, 1385 { .fniv = gen_sqsub_vec, 1386 .fno = gen_helper_gvec_sqsub_d, 1387 .opt_opc = vecop_list, 1388 .write_aofs = true, 1389 .vece = MO_64 }, 1390 }; 1391 1392 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1393 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1394 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1395 } 1396 1397 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1398 { 1399 TCGv_i32 t = tcg_temp_new_i32(); 1400 1401 tcg_gen_sub_i32(t, a, b); 1402 tcg_gen_sub_i32(d, b, a); 1403 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1404 } 1405 1406 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1407 { 1408 TCGv_i64 t = tcg_temp_new_i64(); 1409 1410 tcg_gen_sub_i64(t, a, b); 1411 tcg_gen_sub_i64(d, b, a); 1412 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1413 } 1414 1415 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1416 { 1417 TCGv_vec t = tcg_temp_new_vec_matching(d); 1418 1419 tcg_gen_smin_vec(vece, t, a, b); 1420 tcg_gen_smax_vec(vece, d, a, b); 1421 tcg_gen_sub_vec(vece, d, d, t); 1422 } 1423 1424 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1425 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1426 { 1427 static const TCGOpcode vecop_list[] = { 1428 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1429 }; 1430 static const GVecGen3 ops[4] = { 1431 { .fniv = gen_sabd_vec, 1432 .fno = gen_helper_gvec_sabd_b, 1433 .opt_opc = vecop_list, 1434 .vece = MO_8 }, 1435 { .fniv = gen_sabd_vec, 1436 .fno = gen_helper_gvec_sabd_h, 1437 .opt_opc = vecop_list, 1438 .vece = MO_16 }, 1439 { .fni4 = gen_sabd_i32, 1440 .fniv = gen_sabd_vec, 1441 .fno = gen_helper_gvec_sabd_s, 1442 .opt_opc = vecop_list, 1443 .vece = MO_32 }, 1444 { .fni8 = gen_sabd_i64, 1445 .fniv = gen_sabd_vec, 1446 .fno = gen_helper_gvec_sabd_d, 1447 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1448 .opt_opc = vecop_list, 1449 .vece = MO_64 }, 1450 }; 1451 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1452 } 1453 1454 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1455 { 1456 TCGv_i32 t = tcg_temp_new_i32(); 1457 1458 tcg_gen_sub_i32(t, a, b); 1459 tcg_gen_sub_i32(d, b, a); 1460 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1461 } 1462 1463 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1464 { 1465 TCGv_i64 t = tcg_temp_new_i64(); 1466 1467 tcg_gen_sub_i64(t, a, b); 1468 tcg_gen_sub_i64(d, b, a); 1469 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1470 } 1471 1472 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1473 { 1474 TCGv_vec t = tcg_temp_new_vec_matching(d); 1475 1476 tcg_gen_umin_vec(vece, t, a, b); 1477 tcg_gen_umax_vec(vece, d, a, b); 1478 tcg_gen_sub_vec(vece, d, d, t); 1479 } 1480 1481 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1482 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1483 { 1484 static const TCGOpcode vecop_list[] = { 1485 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1486 }; 1487 static const GVecGen3 ops[4] = { 1488 { .fniv = gen_uabd_vec, 1489 .fno = gen_helper_gvec_uabd_b, 1490 .opt_opc = vecop_list, 1491 .vece = MO_8 }, 1492 { .fniv = gen_uabd_vec, 1493 .fno = gen_helper_gvec_uabd_h, 1494 .opt_opc = vecop_list, 1495 .vece = MO_16 }, 1496 { .fni4 = gen_uabd_i32, 1497 .fniv = gen_uabd_vec, 1498 .fno = gen_helper_gvec_uabd_s, 1499 .opt_opc = vecop_list, 1500 .vece = MO_32 }, 1501 { .fni8 = gen_uabd_i64, 1502 .fniv = gen_uabd_vec, 1503 .fno = gen_helper_gvec_uabd_d, 1504 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1505 .opt_opc = vecop_list, 1506 .vece = MO_64 }, 1507 }; 1508 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1509 } 1510 1511 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1512 { 1513 TCGv_i32 t = tcg_temp_new_i32(); 1514 gen_sabd_i32(t, a, b); 1515 tcg_gen_add_i32(d, d, t); 1516 } 1517 1518 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1519 { 1520 TCGv_i64 t = tcg_temp_new_i64(); 1521 gen_sabd_i64(t, a, b); 1522 tcg_gen_add_i64(d, d, t); 1523 } 1524 1525 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1526 { 1527 TCGv_vec t = tcg_temp_new_vec_matching(d); 1528 gen_sabd_vec(vece, t, a, b); 1529 tcg_gen_add_vec(vece, d, d, t); 1530 } 1531 1532 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1533 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1534 { 1535 static const TCGOpcode vecop_list[] = { 1536 INDEX_op_sub_vec, INDEX_op_add_vec, 1537 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1538 }; 1539 static const GVecGen3 ops[4] = { 1540 { .fniv = gen_saba_vec, 1541 .fno = gen_helper_gvec_saba_b, 1542 .opt_opc = vecop_list, 1543 .load_dest = true, 1544 .vece = MO_8 }, 1545 { .fniv = gen_saba_vec, 1546 .fno = gen_helper_gvec_saba_h, 1547 .opt_opc = vecop_list, 1548 .load_dest = true, 1549 .vece = MO_16 }, 1550 { .fni4 = gen_saba_i32, 1551 .fniv = gen_saba_vec, 1552 .fno = gen_helper_gvec_saba_s, 1553 .opt_opc = vecop_list, 1554 .load_dest = true, 1555 .vece = MO_32 }, 1556 { .fni8 = gen_saba_i64, 1557 .fniv = gen_saba_vec, 1558 .fno = gen_helper_gvec_saba_d, 1559 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1560 .opt_opc = vecop_list, 1561 .load_dest = true, 1562 .vece = MO_64 }, 1563 }; 1564 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1565 } 1566 1567 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1568 { 1569 TCGv_i32 t = tcg_temp_new_i32(); 1570 gen_uabd_i32(t, a, b); 1571 tcg_gen_add_i32(d, d, t); 1572 } 1573 1574 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1575 { 1576 TCGv_i64 t = tcg_temp_new_i64(); 1577 gen_uabd_i64(t, a, b); 1578 tcg_gen_add_i64(d, d, t); 1579 } 1580 1581 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1582 { 1583 TCGv_vec t = tcg_temp_new_vec_matching(d); 1584 gen_uabd_vec(vece, t, a, b); 1585 tcg_gen_add_vec(vece, d, d, t); 1586 } 1587 1588 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1589 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1590 { 1591 static const TCGOpcode vecop_list[] = { 1592 INDEX_op_sub_vec, INDEX_op_add_vec, 1593 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1594 }; 1595 static const GVecGen3 ops[4] = { 1596 { .fniv = gen_uaba_vec, 1597 .fno = gen_helper_gvec_uaba_b, 1598 .opt_opc = vecop_list, 1599 .load_dest = true, 1600 .vece = MO_8 }, 1601 { .fniv = gen_uaba_vec, 1602 .fno = gen_helper_gvec_uaba_h, 1603 .opt_opc = vecop_list, 1604 .load_dest = true, 1605 .vece = MO_16 }, 1606 { .fni4 = gen_uaba_i32, 1607 .fniv = gen_uaba_vec, 1608 .fno = gen_helper_gvec_uaba_s, 1609 .opt_opc = vecop_list, 1610 .load_dest = true, 1611 .vece = MO_32 }, 1612 { .fni8 = gen_uaba_i64, 1613 .fniv = gen_uaba_vec, 1614 .fno = gen_helper_gvec_uaba_d, 1615 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1616 .opt_opc = vecop_list, 1617 .load_dest = true, 1618 .vece = MO_64 }, 1619 }; 1620 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1621 } 1622 1623 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1624 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1625 { 1626 static gen_helper_gvec_3 * const fns[4] = { 1627 gen_helper_gvec_addp_b, 1628 gen_helper_gvec_addp_h, 1629 gen_helper_gvec_addp_s, 1630 gen_helper_gvec_addp_d, 1631 }; 1632 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1633 } 1634 1635 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1636 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1637 { 1638 static gen_helper_gvec_3 * const fns[4] = { 1639 gen_helper_gvec_smaxp_b, 1640 gen_helper_gvec_smaxp_h, 1641 gen_helper_gvec_smaxp_s, 1642 }; 1643 tcg_debug_assert(vece <= MO_32); 1644 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1645 } 1646 1647 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1648 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1649 { 1650 static gen_helper_gvec_3 * const fns[4] = { 1651 gen_helper_gvec_sminp_b, 1652 gen_helper_gvec_sminp_h, 1653 gen_helper_gvec_sminp_s, 1654 }; 1655 tcg_debug_assert(vece <= MO_32); 1656 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1657 } 1658 1659 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1660 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1661 { 1662 static gen_helper_gvec_3 * const fns[4] = { 1663 gen_helper_gvec_umaxp_b, 1664 gen_helper_gvec_umaxp_h, 1665 gen_helper_gvec_umaxp_s, 1666 }; 1667 tcg_debug_assert(vece <= MO_32); 1668 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1669 } 1670 1671 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1672 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1673 { 1674 static gen_helper_gvec_3 * const fns[4] = { 1675 gen_helper_gvec_uminp_b, 1676 gen_helper_gvec_uminp_h, 1677 gen_helper_gvec_uminp_s, 1678 }; 1679 tcg_debug_assert(vece <= MO_32); 1680 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1681 } 1682