1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 35 opr_sz, max_sz, 0, fn); 36 } 37 38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40 { 41 static gen_helper_gvec_3_ptr * const fns[2] = { 42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 43 }; 44 tcg_debug_assert(vece >= 1 && vece <= 2); 45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46 } 47 48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50 { 51 static gen_helper_gvec_3_ptr * const fns[2] = { 52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 53 }; 54 tcg_debug_assert(vece >= 1 && vece <= 2); 55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56 } 57 58 #define GEN_CMP0(NAME, COND) \ 59 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 60 uint32_t opr_sz, uint32_t max_sz) \ 61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 62 63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 68 69 #undef GEN_CMP0 70 71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72 { 73 tcg_gen_vec_sar8i_i64(a, a, shift); 74 tcg_gen_vec_add8_i64(d, d, a); 75 } 76 77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78 { 79 tcg_gen_vec_sar16i_i64(a, a, shift); 80 tcg_gen_vec_add16_i64(d, d, a); 81 } 82 83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 84 { 85 tcg_gen_sari_i32(a, a, shift); 86 tcg_gen_add_i32(d, d, a); 87 } 88 89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 90 { 91 tcg_gen_sari_i64(a, a, shift); 92 tcg_gen_add_i64(d, d, a); 93 } 94 95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 96 { 97 tcg_gen_sari_vec(vece, a, a, sh); 98 tcg_gen_add_vec(vece, d, d, a); 99 } 100 101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 102 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 103 { 104 static const TCGOpcode vecop_list[] = { 105 INDEX_op_sari_vec, INDEX_op_add_vec, 0 106 }; 107 static const GVecGen2i ops[4] = { 108 { .fni8 = gen_ssra8_i64, 109 .fniv = gen_ssra_vec, 110 .fno = gen_helper_gvec_ssra_b, 111 .load_dest = true, 112 .opt_opc = vecop_list, 113 .vece = MO_8 }, 114 { .fni8 = gen_ssra16_i64, 115 .fniv = gen_ssra_vec, 116 .fno = gen_helper_gvec_ssra_h, 117 .load_dest = true, 118 .opt_opc = vecop_list, 119 .vece = MO_16 }, 120 { .fni4 = gen_ssra32_i32, 121 .fniv = gen_ssra_vec, 122 .fno = gen_helper_gvec_ssra_s, 123 .load_dest = true, 124 .opt_opc = vecop_list, 125 .vece = MO_32 }, 126 { .fni8 = gen_ssra64_i64, 127 .fniv = gen_ssra_vec, 128 .fno = gen_helper_gvec_ssra_d, 129 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 130 .opt_opc = vecop_list, 131 .load_dest = true, 132 .vece = MO_64 }, 133 }; 134 135 /* tszimm encoding produces immediates in the range [1..esize]. */ 136 tcg_debug_assert(shift > 0); 137 tcg_debug_assert(shift <= (8 << vece)); 138 139 /* 140 * Shifts larger than the element size are architecturally valid. 141 * Signed results in all sign bits. 142 */ 143 shift = MIN(shift, (8 << vece) - 1); 144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 145 } 146 147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 148 { 149 tcg_gen_vec_shr8i_i64(a, a, shift); 150 tcg_gen_vec_add8_i64(d, d, a); 151 } 152 153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 154 { 155 tcg_gen_vec_shr16i_i64(a, a, shift); 156 tcg_gen_vec_add16_i64(d, d, a); 157 } 158 159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 160 { 161 tcg_gen_shri_i32(a, a, shift); 162 tcg_gen_add_i32(d, d, a); 163 } 164 165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 166 { 167 tcg_gen_shri_i64(a, a, shift); 168 tcg_gen_add_i64(d, d, a); 169 } 170 171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 172 { 173 tcg_gen_shri_vec(vece, a, a, sh); 174 tcg_gen_add_vec(vece, d, d, a); 175 } 176 177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 178 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 179 { 180 static const TCGOpcode vecop_list[] = { 181 INDEX_op_shri_vec, INDEX_op_add_vec, 0 182 }; 183 static const GVecGen2i ops[4] = { 184 { .fni8 = gen_usra8_i64, 185 .fniv = gen_usra_vec, 186 .fno = gen_helper_gvec_usra_b, 187 .load_dest = true, 188 .opt_opc = vecop_list, 189 .vece = MO_8, }, 190 { .fni8 = gen_usra16_i64, 191 .fniv = gen_usra_vec, 192 .fno = gen_helper_gvec_usra_h, 193 .load_dest = true, 194 .opt_opc = vecop_list, 195 .vece = MO_16, }, 196 { .fni4 = gen_usra32_i32, 197 .fniv = gen_usra_vec, 198 .fno = gen_helper_gvec_usra_s, 199 .load_dest = true, 200 .opt_opc = vecop_list, 201 .vece = MO_32, }, 202 { .fni8 = gen_usra64_i64, 203 .fniv = gen_usra_vec, 204 .fno = gen_helper_gvec_usra_d, 205 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 206 .load_dest = true, 207 .opt_opc = vecop_list, 208 .vece = MO_64, }, 209 }; 210 211 /* tszimm encoding produces immediates in the range [1..esize]. */ 212 tcg_debug_assert(shift > 0); 213 tcg_debug_assert(shift <= (8 << vece)); 214 215 /* 216 * Shifts larger than the element size are architecturally valid. 217 * Unsigned results in all zeros as input to accumulate: nop. 218 */ 219 if (shift < (8 << vece)) { 220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 221 } else { 222 /* Nop, but we do need to clear the tail. */ 223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 224 } 225 } 226 227 /* 228 * Shift one less than the requested amount, and the low bit is 229 * the rounding bit. For the 8 and 16-bit operations, because we 230 * mask the low bit, we can perform a normal integer shift instead 231 * of a vector shift. 232 */ 233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 234 { 235 TCGv_i64 t = tcg_temp_new_i64(); 236 237 tcg_gen_shri_i64(t, a, sh - 1); 238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 239 tcg_gen_vec_sar8i_i64(d, a, sh); 240 tcg_gen_vec_add8_i64(d, d, t); 241 } 242 243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 244 { 245 TCGv_i64 t = tcg_temp_new_i64(); 246 247 tcg_gen_shri_i64(t, a, sh - 1); 248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 249 tcg_gen_vec_sar16i_i64(d, a, sh); 250 tcg_gen_vec_add16_i64(d, d, t); 251 } 252 253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 254 { 255 TCGv_i32 t; 256 257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 258 if (sh == 32) { 259 tcg_gen_movi_i32(d, 0); 260 return; 261 } 262 t = tcg_temp_new_i32(); 263 tcg_gen_extract_i32(t, a, sh - 1, 1); 264 tcg_gen_sari_i32(d, a, sh); 265 tcg_gen_add_i32(d, d, t); 266 } 267 268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 269 { 270 TCGv_i64 t = tcg_temp_new_i64(); 271 272 tcg_gen_extract_i64(t, a, sh - 1, 1); 273 tcg_gen_sari_i64(d, a, sh); 274 tcg_gen_add_i64(d, d, t); 275 } 276 277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 278 { 279 TCGv_vec t = tcg_temp_new_vec_matching(d); 280 TCGv_vec ones = tcg_temp_new_vec_matching(d); 281 282 tcg_gen_shri_vec(vece, t, a, sh - 1); 283 tcg_gen_dupi_vec(vece, ones, 1); 284 tcg_gen_and_vec(vece, t, t, ones); 285 tcg_gen_sari_vec(vece, d, a, sh); 286 tcg_gen_add_vec(vece, d, d, t); 287 } 288 289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 290 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 291 { 292 static const TCGOpcode vecop_list[] = { 293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 294 }; 295 static const GVecGen2i ops[4] = { 296 { .fni8 = gen_srshr8_i64, 297 .fniv = gen_srshr_vec, 298 .fno = gen_helper_gvec_srshr_b, 299 .opt_opc = vecop_list, 300 .vece = MO_8 }, 301 { .fni8 = gen_srshr16_i64, 302 .fniv = gen_srshr_vec, 303 .fno = gen_helper_gvec_srshr_h, 304 .opt_opc = vecop_list, 305 .vece = MO_16 }, 306 { .fni4 = gen_srshr32_i32, 307 .fniv = gen_srshr_vec, 308 .fno = gen_helper_gvec_srshr_s, 309 .opt_opc = vecop_list, 310 .vece = MO_32 }, 311 { .fni8 = gen_srshr64_i64, 312 .fniv = gen_srshr_vec, 313 .fno = gen_helper_gvec_srshr_d, 314 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 315 .opt_opc = vecop_list, 316 .vece = MO_64 }, 317 }; 318 319 /* tszimm encoding produces immediates in the range [1..esize] */ 320 tcg_debug_assert(shift > 0); 321 tcg_debug_assert(shift <= (8 << vece)); 322 323 if (shift == (8 << vece)) { 324 /* 325 * Shifts larger than the element size are architecturally valid. 326 * Signed results in all sign bits. With rounding, this produces 327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 328 * I.e. always zero. 329 */ 330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 331 } else { 332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 333 } 334 } 335 336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 337 { 338 TCGv_i64 t = tcg_temp_new_i64(); 339 340 gen_srshr8_i64(t, a, sh); 341 tcg_gen_vec_add8_i64(d, d, t); 342 } 343 344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 345 { 346 TCGv_i64 t = tcg_temp_new_i64(); 347 348 gen_srshr16_i64(t, a, sh); 349 tcg_gen_vec_add16_i64(d, d, t); 350 } 351 352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 353 { 354 TCGv_i32 t = tcg_temp_new_i32(); 355 356 gen_srshr32_i32(t, a, sh); 357 tcg_gen_add_i32(d, d, t); 358 } 359 360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 361 { 362 TCGv_i64 t = tcg_temp_new_i64(); 363 364 gen_srshr64_i64(t, a, sh); 365 tcg_gen_add_i64(d, d, t); 366 } 367 368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 369 { 370 TCGv_vec t = tcg_temp_new_vec_matching(d); 371 372 gen_srshr_vec(vece, t, a, sh); 373 tcg_gen_add_vec(vece, d, d, t); 374 } 375 376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 377 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 378 { 379 static const TCGOpcode vecop_list[] = { 380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 381 }; 382 static const GVecGen2i ops[4] = { 383 { .fni8 = gen_srsra8_i64, 384 .fniv = gen_srsra_vec, 385 .fno = gen_helper_gvec_srsra_b, 386 .opt_opc = vecop_list, 387 .load_dest = true, 388 .vece = MO_8 }, 389 { .fni8 = gen_srsra16_i64, 390 .fniv = gen_srsra_vec, 391 .fno = gen_helper_gvec_srsra_h, 392 .opt_opc = vecop_list, 393 .load_dest = true, 394 .vece = MO_16 }, 395 { .fni4 = gen_srsra32_i32, 396 .fniv = gen_srsra_vec, 397 .fno = gen_helper_gvec_srsra_s, 398 .opt_opc = vecop_list, 399 .load_dest = true, 400 .vece = MO_32 }, 401 { .fni8 = gen_srsra64_i64, 402 .fniv = gen_srsra_vec, 403 .fno = gen_helper_gvec_srsra_d, 404 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 405 .opt_opc = vecop_list, 406 .load_dest = true, 407 .vece = MO_64 }, 408 }; 409 410 /* tszimm encoding produces immediates in the range [1..esize] */ 411 tcg_debug_assert(shift > 0); 412 tcg_debug_assert(shift <= (8 << vece)); 413 414 /* 415 * Shifts larger than the element size are architecturally valid. 416 * Signed results in all sign bits. With rounding, this produces 417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 418 * I.e. always zero. With accumulation, this leaves D unchanged. 419 */ 420 if (shift == (8 << vece)) { 421 /* Nop, but we do need to clear the tail. */ 422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 423 } else { 424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 425 } 426 } 427 428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 429 { 430 TCGv_i64 t = tcg_temp_new_i64(); 431 432 tcg_gen_shri_i64(t, a, sh - 1); 433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 434 tcg_gen_vec_shr8i_i64(d, a, sh); 435 tcg_gen_vec_add8_i64(d, d, t); 436 } 437 438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 439 { 440 TCGv_i64 t = tcg_temp_new_i64(); 441 442 tcg_gen_shri_i64(t, a, sh - 1); 443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 444 tcg_gen_vec_shr16i_i64(d, a, sh); 445 tcg_gen_vec_add16_i64(d, d, t); 446 } 447 448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 449 { 450 TCGv_i32 t; 451 452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 453 if (sh == 32) { 454 tcg_gen_extract_i32(d, a, sh - 1, 1); 455 return; 456 } 457 t = tcg_temp_new_i32(); 458 tcg_gen_extract_i32(t, a, sh - 1, 1); 459 tcg_gen_shri_i32(d, a, sh); 460 tcg_gen_add_i32(d, d, t); 461 } 462 463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 464 { 465 TCGv_i64 t = tcg_temp_new_i64(); 466 467 tcg_gen_extract_i64(t, a, sh - 1, 1); 468 tcg_gen_shri_i64(d, a, sh); 469 tcg_gen_add_i64(d, d, t); 470 } 471 472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 473 { 474 TCGv_vec t = tcg_temp_new_vec_matching(d); 475 TCGv_vec ones = tcg_temp_new_vec_matching(d); 476 477 tcg_gen_shri_vec(vece, t, a, shift - 1); 478 tcg_gen_dupi_vec(vece, ones, 1); 479 tcg_gen_and_vec(vece, t, t, ones); 480 tcg_gen_shri_vec(vece, d, a, shift); 481 tcg_gen_add_vec(vece, d, d, t); 482 } 483 484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 485 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 486 { 487 static const TCGOpcode vecop_list[] = { 488 INDEX_op_shri_vec, INDEX_op_add_vec, 0 489 }; 490 static const GVecGen2i ops[4] = { 491 { .fni8 = gen_urshr8_i64, 492 .fniv = gen_urshr_vec, 493 .fno = gen_helper_gvec_urshr_b, 494 .opt_opc = vecop_list, 495 .vece = MO_8 }, 496 { .fni8 = gen_urshr16_i64, 497 .fniv = gen_urshr_vec, 498 .fno = gen_helper_gvec_urshr_h, 499 .opt_opc = vecop_list, 500 .vece = MO_16 }, 501 { .fni4 = gen_urshr32_i32, 502 .fniv = gen_urshr_vec, 503 .fno = gen_helper_gvec_urshr_s, 504 .opt_opc = vecop_list, 505 .vece = MO_32 }, 506 { .fni8 = gen_urshr64_i64, 507 .fniv = gen_urshr_vec, 508 .fno = gen_helper_gvec_urshr_d, 509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 510 .opt_opc = vecop_list, 511 .vece = MO_64 }, 512 }; 513 514 /* tszimm encoding produces immediates in the range [1..esize] */ 515 tcg_debug_assert(shift > 0); 516 tcg_debug_assert(shift <= (8 << vece)); 517 518 if (shift == (8 << vece)) { 519 /* 520 * Shifts larger than the element size are architecturally valid. 521 * Unsigned results in zero. With rounding, this produces a 522 * copy of the most significant bit. 523 */ 524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 525 } else { 526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 527 } 528 } 529 530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 534 if (sh == 8) { 535 tcg_gen_vec_shr8i_i64(t, a, 7); 536 } else { 537 gen_urshr8_i64(t, a, sh); 538 } 539 tcg_gen_vec_add8_i64(d, d, t); 540 } 541 542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 543 { 544 TCGv_i64 t = tcg_temp_new_i64(); 545 546 if (sh == 16) { 547 tcg_gen_vec_shr16i_i64(t, a, 15); 548 } else { 549 gen_urshr16_i64(t, a, sh); 550 } 551 tcg_gen_vec_add16_i64(d, d, t); 552 } 553 554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 555 { 556 TCGv_i32 t = tcg_temp_new_i32(); 557 558 if (sh == 32) { 559 tcg_gen_shri_i32(t, a, 31); 560 } else { 561 gen_urshr32_i32(t, a, sh); 562 } 563 tcg_gen_add_i32(d, d, t); 564 } 565 566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 567 { 568 TCGv_i64 t = tcg_temp_new_i64(); 569 570 if (sh == 64) { 571 tcg_gen_shri_i64(t, a, 63); 572 } else { 573 gen_urshr64_i64(t, a, sh); 574 } 575 tcg_gen_add_i64(d, d, t); 576 } 577 578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 579 { 580 TCGv_vec t = tcg_temp_new_vec_matching(d); 581 582 if (sh == (8 << vece)) { 583 tcg_gen_shri_vec(vece, t, a, sh - 1); 584 } else { 585 gen_urshr_vec(vece, t, a, sh); 586 } 587 tcg_gen_add_vec(vece, d, d, t); 588 } 589 590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 591 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 592 { 593 static const TCGOpcode vecop_list[] = { 594 INDEX_op_shri_vec, INDEX_op_add_vec, 0 595 }; 596 static const GVecGen2i ops[4] = { 597 { .fni8 = gen_ursra8_i64, 598 .fniv = gen_ursra_vec, 599 .fno = gen_helper_gvec_ursra_b, 600 .opt_opc = vecop_list, 601 .load_dest = true, 602 .vece = MO_8 }, 603 { .fni8 = gen_ursra16_i64, 604 .fniv = gen_ursra_vec, 605 .fno = gen_helper_gvec_ursra_h, 606 .opt_opc = vecop_list, 607 .load_dest = true, 608 .vece = MO_16 }, 609 { .fni4 = gen_ursra32_i32, 610 .fniv = gen_ursra_vec, 611 .fno = gen_helper_gvec_ursra_s, 612 .opt_opc = vecop_list, 613 .load_dest = true, 614 .vece = MO_32 }, 615 { .fni8 = gen_ursra64_i64, 616 .fniv = gen_ursra_vec, 617 .fno = gen_helper_gvec_ursra_d, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 .opt_opc = vecop_list, 620 .load_dest = true, 621 .vece = MO_64 }, 622 }; 623 624 /* tszimm encoding produces immediates in the range [1..esize] */ 625 tcg_debug_assert(shift > 0); 626 tcg_debug_assert(shift <= (8 << vece)); 627 628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 629 } 630 631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 632 { 633 uint64_t mask = dup_const(MO_8, 0xff >> shift); 634 TCGv_i64 t = tcg_temp_new_i64(); 635 636 tcg_gen_shri_i64(t, a, shift); 637 tcg_gen_andi_i64(t, t, mask); 638 tcg_gen_andi_i64(d, d, ~mask); 639 tcg_gen_or_i64(d, d, t); 640 } 641 642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 643 { 644 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 645 TCGv_i64 t = tcg_temp_new_i64(); 646 647 tcg_gen_shri_i64(t, a, shift); 648 tcg_gen_andi_i64(t, t, mask); 649 tcg_gen_andi_i64(d, d, ~mask); 650 tcg_gen_or_i64(d, d, t); 651 } 652 653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 654 { 655 tcg_gen_shri_i32(a, a, shift); 656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 657 } 658 659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 660 { 661 tcg_gen_shri_i64(a, a, shift); 662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 663 } 664 665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 666 { 667 TCGv_vec t = tcg_temp_new_vec_matching(d); 668 TCGv_vec m = tcg_temp_new_vec_matching(d); 669 670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 671 tcg_gen_shri_vec(vece, t, a, sh); 672 tcg_gen_and_vec(vece, d, d, m); 673 tcg_gen_or_vec(vece, d, d, t); 674 } 675 676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 677 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 678 { 679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 680 const GVecGen2i ops[4] = { 681 { .fni8 = gen_shr8_ins_i64, 682 .fniv = gen_shr_ins_vec, 683 .fno = gen_helper_gvec_sri_b, 684 .load_dest = true, 685 .opt_opc = vecop_list, 686 .vece = MO_8 }, 687 { .fni8 = gen_shr16_ins_i64, 688 .fniv = gen_shr_ins_vec, 689 .fno = gen_helper_gvec_sri_h, 690 .load_dest = true, 691 .opt_opc = vecop_list, 692 .vece = MO_16 }, 693 { .fni4 = gen_shr32_ins_i32, 694 .fniv = gen_shr_ins_vec, 695 .fno = gen_helper_gvec_sri_s, 696 .load_dest = true, 697 .opt_opc = vecop_list, 698 .vece = MO_32 }, 699 { .fni8 = gen_shr64_ins_i64, 700 .fniv = gen_shr_ins_vec, 701 .fno = gen_helper_gvec_sri_d, 702 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 703 .load_dest = true, 704 .opt_opc = vecop_list, 705 .vece = MO_64 }, 706 }; 707 708 /* tszimm encoding produces immediates in the range [1..esize]. */ 709 tcg_debug_assert(shift > 0); 710 tcg_debug_assert(shift <= (8 << vece)); 711 712 /* Shift of esize leaves destination unchanged. */ 713 if (shift < (8 << vece)) { 714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 715 } else { 716 /* Nop, but we do need to clear the tail. */ 717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 718 } 719 } 720 721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 722 { 723 uint64_t mask = dup_const(MO_8, 0xff << shift); 724 TCGv_i64 t = tcg_temp_new_i64(); 725 726 tcg_gen_shli_i64(t, a, shift); 727 tcg_gen_andi_i64(t, t, mask); 728 tcg_gen_andi_i64(d, d, ~mask); 729 tcg_gen_or_i64(d, d, t); 730 } 731 732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 733 { 734 uint64_t mask = dup_const(MO_16, 0xffff << shift); 735 TCGv_i64 t = tcg_temp_new_i64(); 736 737 tcg_gen_shli_i64(t, a, shift); 738 tcg_gen_andi_i64(t, t, mask); 739 tcg_gen_andi_i64(d, d, ~mask); 740 tcg_gen_or_i64(d, d, t); 741 } 742 743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 744 { 745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 746 } 747 748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 749 { 750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 751 } 752 753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 754 { 755 TCGv_vec t = tcg_temp_new_vec_matching(d); 756 TCGv_vec m = tcg_temp_new_vec_matching(d); 757 758 tcg_gen_shli_vec(vece, t, a, sh); 759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 760 tcg_gen_and_vec(vece, d, d, m); 761 tcg_gen_or_vec(vece, d, d, t); 762 } 763 764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 765 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 766 { 767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 768 const GVecGen2i ops[4] = { 769 { .fni8 = gen_shl8_ins_i64, 770 .fniv = gen_shl_ins_vec, 771 .fno = gen_helper_gvec_sli_b, 772 .load_dest = true, 773 .opt_opc = vecop_list, 774 .vece = MO_8 }, 775 { .fni8 = gen_shl16_ins_i64, 776 .fniv = gen_shl_ins_vec, 777 .fno = gen_helper_gvec_sli_h, 778 .load_dest = true, 779 .opt_opc = vecop_list, 780 .vece = MO_16 }, 781 { .fni4 = gen_shl32_ins_i32, 782 .fniv = gen_shl_ins_vec, 783 .fno = gen_helper_gvec_sli_s, 784 .load_dest = true, 785 .opt_opc = vecop_list, 786 .vece = MO_32 }, 787 { .fni8 = gen_shl64_ins_i64, 788 .fniv = gen_shl_ins_vec, 789 .fno = gen_helper_gvec_sli_d, 790 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 791 .load_dest = true, 792 .opt_opc = vecop_list, 793 .vece = MO_64 }, 794 }; 795 796 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 797 tcg_debug_assert(shift >= 0); 798 tcg_debug_assert(shift < (8 << vece)); 799 800 if (shift == 0) { 801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 802 } else { 803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 804 } 805 } 806 807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 808 { 809 gen_helper_neon_mul_u8(a, a, b); 810 gen_helper_neon_add_u8(d, d, a); 811 } 812 813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 814 { 815 gen_helper_neon_mul_u8(a, a, b); 816 gen_helper_neon_sub_u8(d, d, a); 817 } 818 819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 820 { 821 gen_helper_neon_mul_u16(a, a, b); 822 gen_helper_neon_add_u16(d, d, a); 823 } 824 825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 826 { 827 gen_helper_neon_mul_u16(a, a, b); 828 gen_helper_neon_sub_u16(d, d, a); 829 } 830 831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 832 { 833 tcg_gen_mul_i32(a, a, b); 834 tcg_gen_add_i32(d, d, a); 835 } 836 837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 838 { 839 tcg_gen_mul_i32(a, a, b); 840 tcg_gen_sub_i32(d, d, a); 841 } 842 843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 844 { 845 tcg_gen_mul_i64(a, a, b); 846 tcg_gen_add_i64(d, d, a); 847 } 848 849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 850 { 851 tcg_gen_mul_i64(a, a, b); 852 tcg_gen_sub_i64(d, d, a); 853 } 854 855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 856 { 857 tcg_gen_mul_vec(vece, a, a, b); 858 tcg_gen_add_vec(vece, d, d, a); 859 } 860 861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 862 { 863 tcg_gen_mul_vec(vece, a, a, b); 864 tcg_gen_sub_vec(vece, d, d, a); 865 } 866 867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 868 * these tables are shared with AArch64 which does support them. 869 */ 870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 872 { 873 static const TCGOpcode vecop_list[] = { 874 INDEX_op_mul_vec, INDEX_op_add_vec, 0 875 }; 876 static const GVecGen3 ops[4] = { 877 { .fni4 = gen_mla8_i32, 878 .fniv = gen_mla_vec, 879 .load_dest = true, 880 .opt_opc = vecop_list, 881 .vece = MO_8 }, 882 { .fni4 = gen_mla16_i32, 883 .fniv = gen_mla_vec, 884 .load_dest = true, 885 .opt_opc = vecop_list, 886 .vece = MO_16 }, 887 { .fni4 = gen_mla32_i32, 888 .fniv = gen_mla_vec, 889 .load_dest = true, 890 .opt_opc = vecop_list, 891 .vece = MO_32 }, 892 { .fni8 = gen_mla64_i64, 893 .fniv = gen_mla_vec, 894 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 895 .load_dest = true, 896 .opt_opc = vecop_list, 897 .vece = MO_64 }, 898 }; 899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 900 } 901 902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 904 { 905 static const TCGOpcode vecop_list[] = { 906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 907 }; 908 static const GVecGen3 ops[4] = { 909 { .fni4 = gen_mls8_i32, 910 .fniv = gen_mls_vec, 911 .load_dest = true, 912 .opt_opc = vecop_list, 913 .vece = MO_8 }, 914 { .fni4 = gen_mls16_i32, 915 .fniv = gen_mls_vec, 916 .load_dest = true, 917 .opt_opc = vecop_list, 918 .vece = MO_16 }, 919 { .fni4 = gen_mls32_i32, 920 .fniv = gen_mls_vec, 921 .load_dest = true, 922 .opt_opc = vecop_list, 923 .vece = MO_32 }, 924 { .fni8 = gen_mls64_i64, 925 .fniv = gen_mls_vec, 926 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 927 .load_dest = true, 928 .opt_opc = vecop_list, 929 .vece = MO_64 }, 930 }; 931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 932 } 933 934 /* CMTST : test is "if (X & Y != 0)". */ 935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 936 { 937 tcg_gen_and_i32(d, a, b); 938 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 939 } 940 941 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 942 { 943 tcg_gen_and_i64(d, a, b); 944 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 945 } 946 947 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 948 { 949 tcg_gen_and_vec(vece, d, a, b); 950 tcg_gen_dupi_vec(vece, a, 0); 951 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 952 } 953 954 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 955 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 956 { 957 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 958 static const GVecGen3 ops[4] = { 959 { .fni4 = gen_helper_neon_tst_u8, 960 .fniv = gen_cmtst_vec, 961 .opt_opc = vecop_list, 962 .vece = MO_8 }, 963 { .fni4 = gen_helper_neon_tst_u16, 964 .fniv = gen_cmtst_vec, 965 .opt_opc = vecop_list, 966 .vece = MO_16 }, 967 { .fni4 = gen_cmtst_i32, 968 .fniv = gen_cmtst_vec, 969 .opt_opc = vecop_list, 970 .vece = MO_32 }, 971 { .fni8 = gen_cmtst_i64, 972 .fniv = gen_cmtst_vec, 973 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 974 .opt_opc = vecop_list, 975 .vece = MO_64 }, 976 }; 977 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 978 } 979 980 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 981 { 982 TCGv_i32 lval = tcg_temp_new_i32(); 983 TCGv_i32 rval = tcg_temp_new_i32(); 984 TCGv_i32 lsh = tcg_temp_new_i32(); 985 TCGv_i32 rsh = tcg_temp_new_i32(); 986 TCGv_i32 zero = tcg_constant_i32(0); 987 TCGv_i32 max = tcg_constant_i32(32); 988 989 /* 990 * Rely on the TCG guarantee that out of range shifts produce 991 * unspecified results, not undefined behaviour (i.e. no trap). 992 * Discard out-of-range results after the fact. 993 */ 994 tcg_gen_ext8s_i32(lsh, shift); 995 tcg_gen_neg_i32(rsh, lsh); 996 tcg_gen_shl_i32(lval, src, lsh); 997 tcg_gen_shr_i32(rval, src, rsh); 998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 999 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 1000 } 1001 1002 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1003 { 1004 TCGv_i64 lval = tcg_temp_new_i64(); 1005 TCGv_i64 rval = tcg_temp_new_i64(); 1006 TCGv_i64 lsh = tcg_temp_new_i64(); 1007 TCGv_i64 rsh = tcg_temp_new_i64(); 1008 TCGv_i64 zero = tcg_constant_i64(0); 1009 TCGv_i64 max = tcg_constant_i64(64); 1010 1011 /* 1012 * Rely on the TCG guarantee that out of range shifts produce 1013 * unspecified results, not undefined behaviour (i.e. no trap). 1014 * Discard out-of-range results after the fact. 1015 */ 1016 tcg_gen_ext8s_i64(lsh, shift); 1017 tcg_gen_neg_i64(rsh, lsh); 1018 tcg_gen_shl_i64(lval, src, lsh); 1019 tcg_gen_shr_i64(rval, src, rsh); 1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1021 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1022 } 1023 1024 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1025 TCGv_vec src, TCGv_vec shift) 1026 { 1027 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1028 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1029 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1030 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1031 TCGv_vec msk, max; 1032 1033 tcg_gen_neg_vec(vece, rsh, shift); 1034 if (vece == MO_8) { 1035 tcg_gen_mov_vec(lsh, shift); 1036 } else { 1037 msk = tcg_temp_new_vec_matching(dst); 1038 tcg_gen_dupi_vec(vece, msk, 0xff); 1039 tcg_gen_and_vec(vece, lsh, shift, msk); 1040 tcg_gen_and_vec(vece, rsh, rsh, msk); 1041 } 1042 1043 /* 1044 * Rely on the TCG guarantee that out of range shifts produce 1045 * unspecified results, not undefined behaviour (i.e. no trap). 1046 * Discard out-of-range results after the fact. 1047 */ 1048 tcg_gen_shlv_vec(vece, lval, src, lsh); 1049 tcg_gen_shrv_vec(vece, rval, src, rsh); 1050 1051 max = tcg_temp_new_vec_matching(dst); 1052 tcg_gen_dupi_vec(vece, max, 8 << vece); 1053 1054 /* 1055 * The choice of LT (signed) and GEU (unsigned) are biased toward 1056 * the instructions of the x86_64 host. For MO_8, the whole byte 1057 * is significant so we must use an unsigned compare; otherwise we 1058 * have already masked to a byte and so a signed compare works. 1059 * Other tcg hosts have a full set of comparisons and do not care. 1060 */ 1061 if (vece == MO_8) { 1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1063 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1064 tcg_gen_andc_vec(vece, lval, lval, lsh); 1065 tcg_gen_andc_vec(vece, rval, rval, rsh); 1066 } else { 1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1068 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1069 tcg_gen_and_vec(vece, lval, lval, lsh); 1070 tcg_gen_and_vec(vece, rval, rval, rsh); 1071 } 1072 tcg_gen_or_vec(vece, dst, lval, rval); 1073 } 1074 1075 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1076 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1077 { 1078 static const TCGOpcode vecop_list[] = { 1079 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1080 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1081 }; 1082 static const GVecGen3 ops[4] = { 1083 { .fniv = gen_ushl_vec, 1084 .fno = gen_helper_gvec_ushl_b, 1085 .opt_opc = vecop_list, 1086 .vece = MO_8 }, 1087 { .fniv = gen_ushl_vec, 1088 .fno = gen_helper_gvec_ushl_h, 1089 .opt_opc = vecop_list, 1090 .vece = MO_16 }, 1091 { .fni4 = gen_ushl_i32, 1092 .fniv = gen_ushl_vec, 1093 .opt_opc = vecop_list, 1094 .vece = MO_32 }, 1095 { .fni8 = gen_ushl_i64, 1096 .fniv = gen_ushl_vec, 1097 .opt_opc = vecop_list, 1098 .vece = MO_64 }, 1099 }; 1100 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1101 } 1102 1103 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1104 { 1105 TCGv_i32 lval = tcg_temp_new_i32(); 1106 TCGv_i32 rval = tcg_temp_new_i32(); 1107 TCGv_i32 lsh = tcg_temp_new_i32(); 1108 TCGv_i32 rsh = tcg_temp_new_i32(); 1109 TCGv_i32 zero = tcg_constant_i32(0); 1110 TCGv_i32 max = tcg_constant_i32(31); 1111 1112 /* 1113 * Rely on the TCG guarantee that out of range shifts produce 1114 * unspecified results, not undefined behaviour (i.e. no trap). 1115 * Discard out-of-range results after the fact. 1116 */ 1117 tcg_gen_ext8s_i32(lsh, shift); 1118 tcg_gen_neg_i32(rsh, lsh); 1119 tcg_gen_shl_i32(lval, src, lsh); 1120 tcg_gen_umin_i32(rsh, rsh, max); 1121 tcg_gen_sar_i32(rval, src, rsh); 1122 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1123 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1124 } 1125 1126 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1127 { 1128 TCGv_i64 lval = tcg_temp_new_i64(); 1129 TCGv_i64 rval = tcg_temp_new_i64(); 1130 TCGv_i64 lsh = tcg_temp_new_i64(); 1131 TCGv_i64 rsh = tcg_temp_new_i64(); 1132 TCGv_i64 zero = tcg_constant_i64(0); 1133 TCGv_i64 max = tcg_constant_i64(63); 1134 1135 /* 1136 * Rely on the TCG guarantee that out of range shifts produce 1137 * unspecified results, not undefined behaviour (i.e. no trap). 1138 * Discard out-of-range results after the fact. 1139 */ 1140 tcg_gen_ext8s_i64(lsh, shift); 1141 tcg_gen_neg_i64(rsh, lsh); 1142 tcg_gen_shl_i64(lval, src, lsh); 1143 tcg_gen_umin_i64(rsh, rsh, max); 1144 tcg_gen_sar_i64(rval, src, rsh); 1145 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1146 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1147 } 1148 1149 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1150 TCGv_vec src, TCGv_vec shift) 1151 { 1152 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1153 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1154 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1155 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1156 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1157 1158 /* 1159 * Rely on the TCG guarantee that out of range shifts produce 1160 * unspecified results, not undefined behaviour (i.e. no trap). 1161 * Discard out-of-range results after the fact. 1162 */ 1163 tcg_gen_neg_vec(vece, rsh, shift); 1164 if (vece == MO_8) { 1165 tcg_gen_mov_vec(lsh, shift); 1166 } else { 1167 tcg_gen_dupi_vec(vece, tmp, 0xff); 1168 tcg_gen_and_vec(vece, lsh, shift, tmp); 1169 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1170 } 1171 1172 /* Bound rsh so out of bound right shift gets -1. */ 1173 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1174 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1175 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1176 1177 tcg_gen_shlv_vec(vece, lval, src, lsh); 1178 tcg_gen_sarv_vec(vece, rval, src, rsh); 1179 1180 /* Select in-bound left shift. */ 1181 tcg_gen_andc_vec(vece, lval, lval, tmp); 1182 1183 /* Select between left and right shift. */ 1184 if (vece == MO_8) { 1185 tcg_gen_dupi_vec(vece, tmp, 0); 1186 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1187 } else { 1188 tcg_gen_dupi_vec(vece, tmp, 0x80); 1189 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1190 } 1191 } 1192 1193 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1194 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1195 { 1196 static const TCGOpcode vecop_list[] = { 1197 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1198 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1199 }; 1200 static const GVecGen3 ops[4] = { 1201 { .fniv = gen_sshl_vec, 1202 .fno = gen_helper_gvec_sshl_b, 1203 .opt_opc = vecop_list, 1204 .vece = MO_8 }, 1205 { .fniv = gen_sshl_vec, 1206 .fno = gen_helper_gvec_sshl_h, 1207 .opt_opc = vecop_list, 1208 .vece = MO_16 }, 1209 { .fni4 = gen_sshl_i32, 1210 .fniv = gen_sshl_vec, 1211 .opt_opc = vecop_list, 1212 .vece = MO_32 }, 1213 { .fni8 = gen_sshl_i64, 1214 .fniv = gen_sshl_vec, 1215 .opt_opc = vecop_list, 1216 .vece = MO_64 }, 1217 }; 1218 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1219 } 1220 1221 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1222 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1223 { 1224 static gen_helper_gvec_3 * const fns[] = { 1225 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1226 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1227 }; 1228 tcg_debug_assert(vece <= MO_64); 1229 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1230 } 1231 1232 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1233 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1234 { 1235 static gen_helper_gvec_3 * const fns[] = { 1236 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1237 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1238 }; 1239 tcg_debug_assert(vece <= MO_64); 1240 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1241 } 1242 1243 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1244 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1245 { 1246 static gen_helper_gvec_3_ptr * const fns[] = { 1247 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1248 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1249 }; 1250 tcg_debug_assert(vece <= MO_64); 1251 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1252 opr_sz, max_sz, 0, fns[vece]); 1253 } 1254 1255 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1256 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1257 { 1258 static gen_helper_gvec_3_ptr * const fns[] = { 1259 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1260 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1261 }; 1262 tcg_debug_assert(vece <= MO_64); 1263 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1264 opr_sz, max_sz, 0, fns[vece]); 1265 } 1266 1267 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1268 { 1269 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1270 TCGv_i64 tmp = tcg_temp_new_i64(); 1271 1272 tcg_gen_add_i64(tmp, a, b); 1273 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1274 tcg_gen_xor_i64(tmp, tmp, res); 1275 tcg_gen_or_i64(qc, qc, tmp); 1276 } 1277 1278 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1279 { 1280 TCGv_i64 t = tcg_temp_new_i64(); 1281 1282 tcg_gen_add_i64(t, a, b); 1283 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1284 tcg_constant_i64(UINT64_MAX), t); 1285 tcg_gen_xor_i64(t, t, res); 1286 tcg_gen_or_i64(qc, qc, t); 1287 } 1288 1289 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1290 TCGv_vec a, TCGv_vec b) 1291 { 1292 TCGv_vec x = tcg_temp_new_vec_matching(t); 1293 tcg_gen_add_vec(vece, x, a, b); 1294 tcg_gen_usadd_vec(vece, t, a, b); 1295 tcg_gen_xor_vec(vece, x, x, t); 1296 tcg_gen_or_vec(vece, qc, qc, x); 1297 } 1298 1299 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1300 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1301 { 1302 static const TCGOpcode vecop_list[] = { 1303 INDEX_op_usadd_vec, INDEX_op_add_vec, 0 1304 }; 1305 static const GVecGen4 ops[4] = { 1306 { .fniv = gen_uqadd_vec, 1307 .fno = gen_helper_gvec_uqadd_b, 1308 .write_aofs = true, 1309 .opt_opc = vecop_list, 1310 .vece = MO_8 }, 1311 { .fniv = gen_uqadd_vec, 1312 .fno = gen_helper_gvec_uqadd_h, 1313 .write_aofs = true, 1314 .opt_opc = vecop_list, 1315 .vece = MO_16 }, 1316 { .fniv = gen_uqadd_vec, 1317 .fno = gen_helper_gvec_uqadd_s, 1318 .write_aofs = true, 1319 .opt_opc = vecop_list, 1320 .vece = MO_32 }, 1321 { .fniv = gen_uqadd_vec, 1322 .fni8 = gen_uqadd_d, 1323 .fno = gen_helper_gvec_uqadd_d, 1324 .write_aofs = true, 1325 .opt_opc = vecop_list, 1326 .vece = MO_64 }, 1327 }; 1328 1329 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1330 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1331 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1332 } 1333 1334 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1335 { 1336 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1337 int64_t min = -1ll - max; 1338 TCGv_i64 tmp = tcg_temp_new_i64(); 1339 1340 tcg_gen_add_i64(tmp, a, b); 1341 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1342 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1343 tcg_gen_xor_i64(tmp, tmp, res); 1344 tcg_gen_or_i64(qc, qc, tmp); 1345 } 1346 1347 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1348 { 1349 TCGv_i64 t0 = tcg_temp_new_i64(); 1350 TCGv_i64 t1 = tcg_temp_new_i64(); 1351 TCGv_i64 t2 = tcg_temp_new_i64(); 1352 1353 tcg_gen_add_i64(t0, a, b); 1354 1355 /* Compute signed overflow indication into T1 */ 1356 tcg_gen_xor_i64(t1, a, b); 1357 tcg_gen_xor_i64(t2, t0, a); 1358 tcg_gen_andc_i64(t1, t2, t1); 1359 1360 /* Compute saturated value into T2 */ 1361 tcg_gen_sari_i64(t2, a, 63); 1362 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1363 1364 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1365 tcg_gen_xor_i64(t0, t0, res); 1366 tcg_gen_or_i64(qc, qc, t0); 1367 } 1368 1369 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1370 TCGv_vec a, TCGv_vec b) 1371 { 1372 TCGv_vec x = tcg_temp_new_vec_matching(t); 1373 tcg_gen_add_vec(vece, x, a, b); 1374 tcg_gen_ssadd_vec(vece, t, a, b); 1375 tcg_gen_xor_vec(vece, x, x, t); 1376 tcg_gen_or_vec(vece, qc, qc, x); 1377 } 1378 1379 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1380 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1381 { 1382 static const TCGOpcode vecop_list[] = { 1383 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 1384 }; 1385 static const GVecGen4 ops[4] = { 1386 { .fniv = gen_sqadd_vec, 1387 .fno = gen_helper_gvec_sqadd_b, 1388 .opt_opc = vecop_list, 1389 .write_aofs = true, 1390 .vece = MO_8 }, 1391 { .fniv = gen_sqadd_vec, 1392 .fno = gen_helper_gvec_sqadd_h, 1393 .opt_opc = vecop_list, 1394 .write_aofs = true, 1395 .vece = MO_16 }, 1396 { .fniv = gen_sqadd_vec, 1397 .fno = gen_helper_gvec_sqadd_s, 1398 .opt_opc = vecop_list, 1399 .write_aofs = true, 1400 .vece = MO_32 }, 1401 { .fniv = gen_sqadd_vec, 1402 .fni8 = gen_sqadd_d, 1403 .fno = gen_helper_gvec_sqadd_d, 1404 .opt_opc = vecop_list, 1405 .write_aofs = true, 1406 .vece = MO_64 }, 1407 }; 1408 1409 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1410 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1411 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1412 } 1413 1414 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1415 { 1416 TCGv_i64 tmp = tcg_temp_new_i64(); 1417 1418 tcg_gen_sub_i64(tmp, a, b); 1419 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1420 tcg_gen_xor_i64(tmp, tmp, res); 1421 tcg_gen_or_i64(qc, qc, tmp); 1422 } 1423 1424 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1425 { 1426 TCGv_i64 t = tcg_temp_new_i64(); 1427 1428 tcg_gen_sub_i64(t, a, b); 1429 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1430 tcg_gen_xor_i64(t, t, res); 1431 tcg_gen_or_i64(qc, qc, t); 1432 } 1433 1434 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1435 TCGv_vec a, TCGv_vec b) 1436 { 1437 TCGv_vec x = tcg_temp_new_vec_matching(t); 1438 tcg_gen_sub_vec(vece, x, a, b); 1439 tcg_gen_ussub_vec(vece, t, a, b); 1440 tcg_gen_xor_vec(vece, x, x, t); 1441 tcg_gen_or_vec(vece, qc, qc, x); 1442 } 1443 1444 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1445 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1446 { 1447 static const TCGOpcode vecop_list[] = { 1448 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 1449 }; 1450 static const GVecGen4 ops[4] = { 1451 { .fniv = gen_uqsub_vec, 1452 .fno = gen_helper_gvec_uqsub_b, 1453 .opt_opc = vecop_list, 1454 .write_aofs = true, 1455 .vece = MO_8 }, 1456 { .fniv = gen_uqsub_vec, 1457 .fno = gen_helper_gvec_uqsub_h, 1458 .opt_opc = vecop_list, 1459 .write_aofs = true, 1460 .vece = MO_16 }, 1461 { .fniv = gen_uqsub_vec, 1462 .fno = gen_helper_gvec_uqsub_s, 1463 .opt_opc = vecop_list, 1464 .write_aofs = true, 1465 .vece = MO_32 }, 1466 { .fniv = gen_uqsub_vec, 1467 .fni8 = gen_uqsub_d, 1468 .fno = gen_helper_gvec_uqsub_d, 1469 .opt_opc = vecop_list, 1470 .write_aofs = true, 1471 .vece = MO_64 }, 1472 }; 1473 1474 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1475 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1476 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1477 } 1478 1479 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1480 { 1481 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1482 int64_t min = -1ll - max; 1483 TCGv_i64 tmp = tcg_temp_new_i64(); 1484 1485 tcg_gen_sub_i64(tmp, a, b); 1486 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1487 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1488 tcg_gen_xor_i64(tmp, tmp, res); 1489 tcg_gen_or_i64(qc, qc, tmp); 1490 } 1491 1492 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1493 { 1494 TCGv_i64 t0 = tcg_temp_new_i64(); 1495 TCGv_i64 t1 = tcg_temp_new_i64(); 1496 TCGv_i64 t2 = tcg_temp_new_i64(); 1497 1498 tcg_gen_sub_i64(t0, a, b); 1499 1500 /* Compute signed overflow indication into T1 */ 1501 tcg_gen_xor_i64(t1, a, b); 1502 tcg_gen_xor_i64(t2, t0, a); 1503 tcg_gen_and_i64(t1, t1, t2); 1504 1505 /* Compute saturated value into T2 */ 1506 tcg_gen_sari_i64(t2, a, 63); 1507 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1508 1509 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1510 tcg_gen_xor_i64(t0, t0, res); 1511 tcg_gen_or_i64(qc, qc, t0); 1512 } 1513 1514 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1515 TCGv_vec a, TCGv_vec b) 1516 { 1517 TCGv_vec x = tcg_temp_new_vec_matching(t); 1518 tcg_gen_sub_vec(vece, x, a, b); 1519 tcg_gen_sssub_vec(vece, t, a, b); 1520 tcg_gen_xor_vec(vece, x, x, t); 1521 tcg_gen_or_vec(vece, qc, qc, x); 1522 } 1523 1524 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1525 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1526 { 1527 static const TCGOpcode vecop_list[] = { 1528 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 1529 }; 1530 static const GVecGen4 ops[4] = { 1531 { .fniv = gen_sqsub_vec, 1532 .fno = gen_helper_gvec_sqsub_b, 1533 .opt_opc = vecop_list, 1534 .write_aofs = true, 1535 .vece = MO_8 }, 1536 { .fniv = gen_sqsub_vec, 1537 .fno = gen_helper_gvec_sqsub_h, 1538 .opt_opc = vecop_list, 1539 .write_aofs = true, 1540 .vece = MO_16 }, 1541 { .fniv = gen_sqsub_vec, 1542 .fno = gen_helper_gvec_sqsub_s, 1543 .opt_opc = vecop_list, 1544 .write_aofs = true, 1545 .vece = MO_32 }, 1546 { .fniv = gen_sqsub_vec, 1547 .fni8 = gen_sqsub_d, 1548 .fno = gen_helper_gvec_sqsub_d, 1549 .opt_opc = vecop_list, 1550 .write_aofs = true, 1551 .vece = MO_64 }, 1552 }; 1553 1554 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1555 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1556 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1557 } 1558 1559 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1560 { 1561 TCGv_i32 t = tcg_temp_new_i32(); 1562 1563 tcg_gen_sub_i32(t, a, b); 1564 tcg_gen_sub_i32(d, b, a); 1565 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1566 } 1567 1568 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1569 { 1570 TCGv_i64 t = tcg_temp_new_i64(); 1571 1572 tcg_gen_sub_i64(t, a, b); 1573 tcg_gen_sub_i64(d, b, a); 1574 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1575 } 1576 1577 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1578 { 1579 TCGv_vec t = tcg_temp_new_vec_matching(d); 1580 1581 tcg_gen_smin_vec(vece, t, a, b); 1582 tcg_gen_smax_vec(vece, d, a, b); 1583 tcg_gen_sub_vec(vece, d, d, t); 1584 } 1585 1586 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1587 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1588 { 1589 static const TCGOpcode vecop_list[] = { 1590 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1591 }; 1592 static const GVecGen3 ops[4] = { 1593 { .fniv = gen_sabd_vec, 1594 .fno = gen_helper_gvec_sabd_b, 1595 .opt_opc = vecop_list, 1596 .vece = MO_8 }, 1597 { .fniv = gen_sabd_vec, 1598 .fno = gen_helper_gvec_sabd_h, 1599 .opt_opc = vecop_list, 1600 .vece = MO_16 }, 1601 { .fni4 = gen_sabd_i32, 1602 .fniv = gen_sabd_vec, 1603 .fno = gen_helper_gvec_sabd_s, 1604 .opt_opc = vecop_list, 1605 .vece = MO_32 }, 1606 { .fni8 = gen_sabd_i64, 1607 .fniv = gen_sabd_vec, 1608 .fno = gen_helper_gvec_sabd_d, 1609 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1610 .opt_opc = vecop_list, 1611 .vece = MO_64 }, 1612 }; 1613 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1614 } 1615 1616 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1617 { 1618 TCGv_i32 t = tcg_temp_new_i32(); 1619 1620 tcg_gen_sub_i32(t, a, b); 1621 tcg_gen_sub_i32(d, b, a); 1622 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1623 } 1624 1625 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1626 { 1627 TCGv_i64 t = tcg_temp_new_i64(); 1628 1629 tcg_gen_sub_i64(t, a, b); 1630 tcg_gen_sub_i64(d, b, a); 1631 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1632 } 1633 1634 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1635 { 1636 TCGv_vec t = tcg_temp_new_vec_matching(d); 1637 1638 tcg_gen_umin_vec(vece, t, a, b); 1639 tcg_gen_umax_vec(vece, d, a, b); 1640 tcg_gen_sub_vec(vece, d, d, t); 1641 } 1642 1643 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1644 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1645 { 1646 static const TCGOpcode vecop_list[] = { 1647 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1648 }; 1649 static const GVecGen3 ops[4] = { 1650 { .fniv = gen_uabd_vec, 1651 .fno = gen_helper_gvec_uabd_b, 1652 .opt_opc = vecop_list, 1653 .vece = MO_8 }, 1654 { .fniv = gen_uabd_vec, 1655 .fno = gen_helper_gvec_uabd_h, 1656 .opt_opc = vecop_list, 1657 .vece = MO_16 }, 1658 { .fni4 = gen_uabd_i32, 1659 .fniv = gen_uabd_vec, 1660 .fno = gen_helper_gvec_uabd_s, 1661 .opt_opc = vecop_list, 1662 .vece = MO_32 }, 1663 { .fni8 = gen_uabd_i64, 1664 .fniv = gen_uabd_vec, 1665 .fno = gen_helper_gvec_uabd_d, 1666 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1667 .opt_opc = vecop_list, 1668 .vece = MO_64 }, 1669 }; 1670 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1671 } 1672 1673 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1674 { 1675 TCGv_i32 t = tcg_temp_new_i32(); 1676 gen_sabd_i32(t, a, b); 1677 tcg_gen_add_i32(d, d, t); 1678 } 1679 1680 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1681 { 1682 TCGv_i64 t = tcg_temp_new_i64(); 1683 gen_sabd_i64(t, a, b); 1684 tcg_gen_add_i64(d, d, t); 1685 } 1686 1687 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1688 { 1689 TCGv_vec t = tcg_temp_new_vec_matching(d); 1690 gen_sabd_vec(vece, t, a, b); 1691 tcg_gen_add_vec(vece, d, d, t); 1692 } 1693 1694 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1695 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1696 { 1697 static const TCGOpcode vecop_list[] = { 1698 INDEX_op_sub_vec, INDEX_op_add_vec, 1699 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1700 }; 1701 static const GVecGen3 ops[4] = { 1702 { .fniv = gen_saba_vec, 1703 .fno = gen_helper_gvec_saba_b, 1704 .opt_opc = vecop_list, 1705 .load_dest = true, 1706 .vece = MO_8 }, 1707 { .fniv = gen_saba_vec, 1708 .fno = gen_helper_gvec_saba_h, 1709 .opt_opc = vecop_list, 1710 .load_dest = true, 1711 .vece = MO_16 }, 1712 { .fni4 = gen_saba_i32, 1713 .fniv = gen_saba_vec, 1714 .fno = gen_helper_gvec_saba_s, 1715 .opt_opc = vecop_list, 1716 .load_dest = true, 1717 .vece = MO_32 }, 1718 { .fni8 = gen_saba_i64, 1719 .fniv = gen_saba_vec, 1720 .fno = gen_helper_gvec_saba_d, 1721 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1722 .opt_opc = vecop_list, 1723 .load_dest = true, 1724 .vece = MO_64 }, 1725 }; 1726 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1727 } 1728 1729 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1730 { 1731 TCGv_i32 t = tcg_temp_new_i32(); 1732 gen_uabd_i32(t, a, b); 1733 tcg_gen_add_i32(d, d, t); 1734 } 1735 1736 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1737 { 1738 TCGv_i64 t = tcg_temp_new_i64(); 1739 gen_uabd_i64(t, a, b); 1740 tcg_gen_add_i64(d, d, t); 1741 } 1742 1743 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1744 { 1745 TCGv_vec t = tcg_temp_new_vec_matching(d); 1746 gen_uabd_vec(vece, t, a, b); 1747 tcg_gen_add_vec(vece, d, d, t); 1748 } 1749 1750 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1751 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1752 { 1753 static const TCGOpcode vecop_list[] = { 1754 INDEX_op_sub_vec, INDEX_op_add_vec, 1755 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1756 }; 1757 static const GVecGen3 ops[4] = { 1758 { .fniv = gen_uaba_vec, 1759 .fno = gen_helper_gvec_uaba_b, 1760 .opt_opc = vecop_list, 1761 .load_dest = true, 1762 .vece = MO_8 }, 1763 { .fniv = gen_uaba_vec, 1764 .fno = gen_helper_gvec_uaba_h, 1765 .opt_opc = vecop_list, 1766 .load_dest = true, 1767 .vece = MO_16 }, 1768 { .fni4 = gen_uaba_i32, 1769 .fniv = gen_uaba_vec, 1770 .fno = gen_helper_gvec_uaba_s, 1771 .opt_opc = vecop_list, 1772 .load_dest = true, 1773 .vece = MO_32 }, 1774 { .fni8 = gen_uaba_i64, 1775 .fniv = gen_uaba_vec, 1776 .fno = gen_helper_gvec_uaba_d, 1777 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1778 .opt_opc = vecop_list, 1779 .load_dest = true, 1780 .vece = MO_64 }, 1781 }; 1782 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1783 } 1784 1785 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1786 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1787 { 1788 static gen_helper_gvec_3 * const fns[4] = { 1789 gen_helper_gvec_addp_b, 1790 gen_helper_gvec_addp_h, 1791 gen_helper_gvec_addp_s, 1792 gen_helper_gvec_addp_d, 1793 }; 1794 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1795 } 1796 1797 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1798 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1799 { 1800 static gen_helper_gvec_3 * const fns[4] = { 1801 gen_helper_gvec_smaxp_b, 1802 gen_helper_gvec_smaxp_h, 1803 gen_helper_gvec_smaxp_s, 1804 }; 1805 tcg_debug_assert(vece <= MO_32); 1806 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1807 } 1808 1809 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1810 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1811 { 1812 static gen_helper_gvec_3 * const fns[4] = { 1813 gen_helper_gvec_sminp_b, 1814 gen_helper_gvec_sminp_h, 1815 gen_helper_gvec_sminp_s, 1816 }; 1817 tcg_debug_assert(vece <= MO_32); 1818 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1819 } 1820 1821 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1822 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1823 { 1824 static gen_helper_gvec_3 * const fns[4] = { 1825 gen_helper_gvec_umaxp_b, 1826 gen_helper_gvec_umaxp_h, 1827 gen_helper_gvec_umaxp_s, 1828 }; 1829 tcg_debug_assert(vece <= MO_32); 1830 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1831 } 1832 1833 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1834 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1835 { 1836 static gen_helper_gvec_3 * const fns[4] = { 1837 gen_helper_gvec_uminp_b, 1838 gen_helper_gvec_uminp_h, 1839 gen_helper_gvec_uminp_s, 1840 }; 1841 tcg_debug_assert(vece <= MO_32); 1842 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1843 } 1844