1 /* 2 * ARM generic vector expansion 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "translate.h" 24 25 26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 27 uint32_t opr_sz, uint32_t max_sz, 28 gen_helper_gvec_3_ptr *fn) 29 { 30 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 31 32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 35 opr_sz, max_sz, 0, fn); 36 } 37 38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40 { 41 static gen_helper_gvec_3_ptr * const fns[2] = { 42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 43 }; 44 tcg_debug_assert(vece >= 1 && vece <= 2); 45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46 } 47 48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50 { 51 static gen_helper_gvec_3_ptr * const fns[2] = { 52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 53 }; 54 tcg_debug_assert(vece >= 1 && vece <= 2); 55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56 } 57 58 #define GEN_CMP0(NAME, COND) \ 59 void NAME(unsigned vece, uint32_t d, uint32_t m, \ 60 uint32_t opr_sz, uint32_t max_sz) \ 61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 62 63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 68 69 #undef GEN_CMP0 70 71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 72 { 73 tcg_gen_vec_sar8i_i64(a, a, shift); 74 tcg_gen_vec_add8_i64(d, d, a); 75 } 76 77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78 { 79 tcg_gen_vec_sar16i_i64(a, a, shift); 80 tcg_gen_vec_add16_i64(d, d, a); 81 } 82 83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 84 { 85 tcg_gen_sari_i32(a, a, shift); 86 tcg_gen_add_i32(d, d, a); 87 } 88 89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 90 { 91 tcg_gen_sari_i64(a, a, shift); 92 tcg_gen_add_i64(d, d, a); 93 } 94 95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 96 { 97 tcg_gen_sari_vec(vece, a, a, sh); 98 tcg_gen_add_vec(vece, d, d, a); 99 } 100 101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 102 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 103 { 104 static const TCGOpcode vecop_list[] = { 105 INDEX_op_sari_vec, INDEX_op_add_vec, 0 106 }; 107 static const GVecGen2i ops[4] = { 108 { .fni8 = gen_ssra8_i64, 109 .fniv = gen_ssra_vec, 110 .fno = gen_helper_gvec_ssra_b, 111 .load_dest = true, 112 .opt_opc = vecop_list, 113 .vece = MO_8 }, 114 { .fni8 = gen_ssra16_i64, 115 .fniv = gen_ssra_vec, 116 .fno = gen_helper_gvec_ssra_h, 117 .load_dest = true, 118 .opt_opc = vecop_list, 119 .vece = MO_16 }, 120 { .fni4 = gen_ssra32_i32, 121 .fniv = gen_ssra_vec, 122 .fno = gen_helper_gvec_ssra_s, 123 .load_dest = true, 124 .opt_opc = vecop_list, 125 .vece = MO_32 }, 126 { .fni8 = gen_ssra64_i64, 127 .fniv = gen_ssra_vec, 128 .fno = gen_helper_gvec_ssra_d, 129 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 130 .opt_opc = vecop_list, 131 .load_dest = true, 132 .vece = MO_64 }, 133 }; 134 135 /* tszimm encoding produces immediates in the range [1..esize]. */ 136 tcg_debug_assert(shift > 0); 137 tcg_debug_assert(shift <= (8 << vece)); 138 139 /* 140 * Shifts larger than the element size are architecturally valid. 141 * Signed results in all sign bits. 142 */ 143 shift = MIN(shift, (8 << vece) - 1); 144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 145 } 146 147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 148 { 149 tcg_gen_vec_shr8i_i64(a, a, shift); 150 tcg_gen_vec_add8_i64(d, d, a); 151 } 152 153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 154 { 155 tcg_gen_vec_shr16i_i64(a, a, shift); 156 tcg_gen_vec_add16_i64(d, d, a); 157 } 158 159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 160 { 161 tcg_gen_shri_i32(a, a, shift); 162 tcg_gen_add_i32(d, d, a); 163 } 164 165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 166 { 167 tcg_gen_shri_i64(a, a, shift); 168 tcg_gen_add_i64(d, d, a); 169 } 170 171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 172 { 173 tcg_gen_shri_vec(vece, a, a, sh); 174 tcg_gen_add_vec(vece, d, d, a); 175 } 176 177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 178 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 179 { 180 static const TCGOpcode vecop_list[] = { 181 INDEX_op_shri_vec, INDEX_op_add_vec, 0 182 }; 183 static const GVecGen2i ops[4] = { 184 { .fni8 = gen_usra8_i64, 185 .fniv = gen_usra_vec, 186 .fno = gen_helper_gvec_usra_b, 187 .load_dest = true, 188 .opt_opc = vecop_list, 189 .vece = MO_8, }, 190 { .fni8 = gen_usra16_i64, 191 .fniv = gen_usra_vec, 192 .fno = gen_helper_gvec_usra_h, 193 .load_dest = true, 194 .opt_opc = vecop_list, 195 .vece = MO_16, }, 196 { .fni4 = gen_usra32_i32, 197 .fniv = gen_usra_vec, 198 .fno = gen_helper_gvec_usra_s, 199 .load_dest = true, 200 .opt_opc = vecop_list, 201 .vece = MO_32, }, 202 { .fni8 = gen_usra64_i64, 203 .fniv = gen_usra_vec, 204 .fno = gen_helper_gvec_usra_d, 205 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 206 .load_dest = true, 207 .opt_opc = vecop_list, 208 .vece = MO_64, }, 209 }; 210 211 /* tszimm encoding produces immediates in the range [1..esize]. */ 212 tcg_debug_assert(shift > 0); 213 tcg_debug_assert(shift <= (8 << vece)); 214 215 /* 216 * Shifts larger than the element size are architecturally valid. 217 * Unsigned results in all zeros as input to accumulate: nop. 218 */ 219 if (shift < (8 << vece)) { 220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 221 } else { 222 /* Nop, but we do need to clear the tail. */ 223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 224 } 225 } 226 227 /* 228 * Shift one less than the requested amount, and the low bit is 229 * the rounding bit. For the 8 and 16-bit operations, because we 230 * mask the low bit, we can perform a normal integer shift instead 231 * of a vector shift. 232 */ 233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 234 { 235 TCGv_i64 t = tcg_temp_new_i64(); 236 237 tcg_gen_shri_i64(t, a, sh - 1); 238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 239 tcg_gen_vec_sar8i_i64(d, a, sh); 240 tcg_gen_vec_add8_i64(d, d, t); 241 } 242 243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 244 { 245 TCGv_i64 t = tcg_temp_new_i64(); 246 247 tcg_gen_shri_i64(t, a, sh - 1); 248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 249 tcg_gen_vec_sar16i_i64(d, a, sh); 250 tcg_gen_vec_add16_i64(d, d, t); 251 } 252 253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 254 { 255 TCGv_i32 t; 256 257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 258 if (sh == 32) { 259 tcg_gen_movi_i32(d, 0); 260 return; 261 } 262 t = tcg_temp_new_i32(); 263 tcg_gen_extract_i32(t, a, sh - 1, 1); 264 tcg_gen_sari_i32(d, a, sh); 265 tcg_gen_add_i32(d, d, t); 266 } 267 268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 269 { 270 TCGv_i64 t = tcg_temp_new_i64(); 271 272 tcg_gen_extract_i64(t, a, sh - 1, 1); 273 tcg_gen_sari_i64(d, a, sh); 274 tcg_gen_add_i64(d, d, t); 275 } 276 277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 278 { 279 TCGv_vec t = tcg_temp_new_vec_matching(d); 280 TCGv_vec ones = tcg_temp_new_vec_matching(d); 281 282 tcg_gen_shri_vec(vece, t, a, sh - 1); 283 tcg_gen_dupi_vec(vece, ones, 1); 284 tcg_gen_and_vec(vece, t, t, ones); 285 tcg_gen_sari_vec(vece, d, a, sh); 286 tcg_gen_add_vec(vece, d, d, t); 287 } 288 289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 290 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 291 { 292 static const TCGOpcode vecop_list[] = { 293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 294 }; 295 static const GVecGen2i ops[4] = { 296 { .fni8 = gen_srshr8_i64, 297 .fniv = gen_srshr_vec, 298 .fno = gen_helper_gvec_srshr_b, 299 .opt_opc = vecop_list, 300 .vece = MO_8 }, 301 { .fni8 = gen_srshr16_i64, 302 .fniv = gen_srshr_vec, 303 .fno = gen_helper_gvec_srshr_h, 304 .opt_opc = vecop_list, 305 .vece = MO_16 }, 306 { .fni4 = gen_srshr32_i32, 307 .fniv = gen_srshr_vec, 308 .fno = gen_helper_gvec_srshr_s, 309 .opt_opc = vecop_list, 310 .vece = MO_32 }, 311 { .fni8 = gen_srshr64_i64, 312 .fniv = gen_srshr_vec, 313 .fno = gen_helper_gvec_srshr_d, 314 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 315 .opt_opc = vecop_list, 316 .vece = MO_64 }, 317 }; 318 319 /* tszimm encoding produces immediates in the range [1..esize] */ 320 tcg_debug_assert(shift > 0); 321 tcg_debug_assert(shift <= (8 << vece)); 322 323 if (shift == (8 << vece)) { 324 /* 325 * Shifts larger than the element size are architecturally valid. 326 * Signed results in all sign bits. With rounding, this produces 327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 328 * I.e. always zero. 329 */ 330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 331 } else { 332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 333 } 334 } 335 336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 337 { 338 TCGv_i64 t = tcg_temp_new_i64(); 339 340 gen_srshr8_i64(t, a, sh); 341 tcg_gen_vec_add8_i64(d, d, t); 342 } 343 344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 345 { 346 TCGv_i64 t = tcg_temp_new_i64(); 347 348 gen_srshr16_i64(t, a, sh); 349 tcg_gen_vec_add16_i64(d, d, t); 350 } 351 352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 353 { 354 TCGv_i32 t = tcg_temp_new_i32(); 355 356 gen_srshr32_i32(t, a, sh); 357 tcg_gen_add_i32(d, d, t); 358 } 359 360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 361 { 362 TCGv_i64 t = tcg_temp_new_i64(); 363 364 gen_srshr64_i64(t, a, sh); 365 tcg_gen_add_i64(d, d, t); 366 } 367 368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 369 { 370 TCGv_vec t = tcg_temp_new_vec_matching(d); 371 372 gen_srshr_vec(vece, t, a, sh); 373 tcg_gen_add_vec(vece, d, d, t); 374 } 375 376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 377 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 378 { 379 static const TCGOpcode vecop_list[] = { 380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 381 }; 382 static const GVecGen2i ops[4] = { 383 { .fni8 = gen_srsra8_i64, 384 .fniv = gen_srsra_vec, 385 .fno = gen_helper_gvec_srsra_b, 386 .opt_opc = vecop_list, 387 .load_dest = true, 388 .vece = MO_8 }, 389 { .fni8 = gen_srsra16_i64, 390 .fniv = gen_srsra_vec, 391 .fno = gen_helper_gvec_srsra_h, 392 .opt_opc = vecop_list, 393 .load_dest = true, 394 .vece = MO_16 }, 395 { .fni4 = gen_srsra32_i32, 396 .fniv = gen_srsra_vec, 397 .fno = gen_helper_gvec_srsra_s, 398 .opt_opc = vecop_list, 399 .load_dest = true, 400 .vece = MO_32 }, 401 { .fni8 = gen_srsra64_i64, 402 .fniv = gen_srsra_vec, 403 .fno = gen_helper_gvec_srsra_d, 404 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 405 .opt_opc = vecop_list, 406 .load_dest = true, 407 .vece = MO_64 }, 408 }; 409 410 /* tszimm encoding produces immediates in the range [1..esize] */ 411 tcg_debug_assert(shift > 0); 412 tcg_debug_assert(shift <= (8 << vece)); 413 414 /* 415 * Shifts larger than the element size are architecturally valid. 416 * Signed results in all sign bits. With rounding, this produces 417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 418 * I.e. always zero. With accumulation, this leaves D unchanged. 419 */ 420 if (shift == (8 << vece)) { 421 /* Nop, but we do need to clear the tail. */ 422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 423 } else { 424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 425 } 426 } 427 428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 429 { 430 TCGv_i64 t = tcg_temp_new_i64(); 431 432 tcg_gen_shri_i64(t, a, sh - 1); 433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 434 tcg_gen_vec_shr8i_i64(d, a, sh); 435 tcg_gen_vec_add8_i64(d, d, t); 436 } 437 438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 439 { 440 TCGv_i64 t = tcg_temp_new_i64(); 441 442 tcg_gen_shri_i64(t, a, sh - 1); 443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 444 tcg_gen_vec_shr16i_i64(d, a, sh); 445 tcg_gen_vec_add16_i64(d, d, t); 446 } 447 448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 449 { 450 TCGv_i32 t; 451 452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 453 if (sh == 32) { 454 tcg_gen_extract_i32(d, a, sh - 1, 1); 455 return; 456 } 457 t = tcg_temp_new_i32(); 458 tcg_gen_extract_i32(t, a, sh - 1, 1); 459 tcg_gen_shri_i32(d, a, sh); 460 tcg_gen_add_i32(d, d, t); 461 } 462 463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 464 { 465 TCGv_i64 t = tcg_temp_new_i64(); 466 467 tcg_gen_extract_i64(t, a, sh - 1, 1); 468 tcg_gen_shri_i64(d, a, sh); 469 tcg_gen_add_i64(d, d, t); 470 } 471 472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 473 { 474 TCGv_vec t = tcg_temp_new_vec_matching(d); 475 TCGv_vec ones = tcg_temp_new_vec_matching(d); 476 477 tcg_gen_shri_vec(vece, t, a, shift - 1); 478 tcg_gen_dupi_vec(vece, ones, 1); 479 tcg_gen_and_vec(vece, t, t, ones); 480 tcg_gen_shri_vec(vece, d, a, shift); 481 tcg_gen_add_vec(vece, d, d, t); 482 } 483 484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 485 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 486 { 487 static const TCGOpcode vecop_list[] = { 488 INDEX_op_shri_vec, INDEX_op_add_vec, 0 489 }; 490 static const GVecGen2i ops[4] = { 491 { .fni8 = gen_urshr8_i64, 492 .fniv = gen_urshr_vec, 493 .fno = gen_helper_gvec_urshr_b, 494 .opt_opc = vecop_list, 495 .vece = MO_8 }, 496 { .fni8 = gen_urshr16_i64, 497 .fniv = gen_urshr_vec, 498 .fno = gen_helper_gvec_urshr_h, 499 .opt_opc = vecop_list, 500 .vece = MO_16 }, 501 { .fni4 = gen_urshr32_i32, 502 .fniv = gen_urshr_vec, 503 .fno = gen_helper_gvec_urshr_s, 504 .opt_opc = vecop_list, 505 .vece = MO_32 }, 506 { .fni8 = gen_urshr64_i64, 507 .fniv = gen_urshr_vec, 508 .fno = gen_helper_gvec_urshr_d, 509 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 510 .opt_opc = vecop_list, 511 .vece = MO_64 }, 512 }; 513 514 /* tszimm encoding produces immediates in the range [1..esize] */ 515 tcg_debug_assert(shift > 0); 516 tcg_debug_assert(shift <= (8 << vece)); 517 518 if (shift == (8 << vece)) { 519 /* 520 * Shifts larger than the element size are architecturally valid. 521 * Unsigned results in zero. With rounding, this produces a 522 * copy of the most significant bit. 523 */ 524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 525 } else { 526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 527 } 528 } 529 530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 534 if (sh == 8) { 535 tcg_gen_vec_shr8i_i64(t, a, 7); 536 } else { 537 gen_urshr8_i64(t, a, sh); 538 } 539 tcg_gen_vec_add8_i64(d, d, t); 540 } 541 542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 543 { 544 TCGv_i64 t = tcg_temp_new_i64(); 545 546 if (sh == 16) { 547 tcg_gen_vec_shr16i_i64(t, a, 15); 548 } else { 549 gen_urshr16_i64(t, a, sh); 550 } 551 tcg_gen_vec_add16_i64(d, d, t); 552 } 553 554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 555 { 556 TCGv_i32 t = tcg_temp_new_i32(); 557 558 if (sh == 32) { 559 tcg_gen_shri_i32(t, a, 31); 560 } else { 561 gen_urshr32_i32(t, a, sh); 562 } 563 tcg_gen_add_i32(d, d, t); 564 } 565 566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 567 { 568 TCGv_i64 t = tcg_temp_new_i64(); 569 570 if (sh == 64) { 571 tcg_gen_shri_i64(t, a, 63); 572 } else { 573 gen_urshr64_i64(t, a, sh); 574 } 575 tcg_gen_add_i64(d, d, t); 576 } 577 578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 579 { 580 TCGv_vec t = tcg_temp_new_vec_matching(d); 581 582 if (sh == (8 << vece)) { 583 tcg_gen_shri_vec(vece, t, a, sh - 1); 584 } else { 585 gen_urshr_vec(vece, t, a, sh); 586 } 587 tcg_gen_add_vec(vece, d, d, t); 588 } 589 590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 591 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 592 { 593 static const TCGOpcode vecop_list[] = { 594 INDEX_op_shri_vec, INDEX_op_add_vec, 0 595 }; 596 static const GVecGen2i ops[4] = { 597 { .fni8 = gen_ursra8_i64, 598 .fniv = gen_ursra_vec, 599 .fno = gen_helper_gvec_ursra_b, 600 .opt_opc = vecop_list, 601 .load_dest = true, 602 .vece = MO_8 }, 603 { .fni8 = gen_ursra16_i64, 604 .fniv = gen_ursra_vec, 605 .fno = gen_helper_gvec_ursra_h, 606 .opt_opc = vecop_list, 607 .load_dest = true, 608 .vece = MO_16 }, 609 { .fni4 = gen_ursra32_i32, 610 .fniv = gen_ursra_vec, 611 .fno = gen_helper_gvec_ursra_s, 612 .opt_opc = vecop_list, 613 .load_dest = true, 614 .vece = MO_32 }, 615 { .fni8 = gen_ursra64_i64, 616 .fniv = gen_ursra_vec, 617 .fno = gen_helper_gvec_ursra_d, 618 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 619 .opt_opc = vecop_list, 620 .load_dest = true, 621 .vece = MO_64 }, 622 }; 623 624 /* tszimm encoding produces immediates in the range [1..esize] */ 625 tcg_debug_assert(shift > 0); 626 tcg_debug_assert(shift <= (8 << vece)); 627 628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 629 } 630 631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 632 { 633 uint64_t mask = dup_const(MO_8, 0xff >> shift); 634 TCGv_i64 t = tcg_temp_new_i64(); 635 636 tcg_gen_shri_i64(t, a, shift); 637 tcg_gen_andi_i64(t, t, mask); 638 tcg_gen_andi_i64(d, d, ~mask); 639 tcg_gen_or_i64(d, d, t); 640 } 641 642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 643 { 644 uint64_t mask = dup_const(MO_16, 0xffff >> shift); 645 TCGv_i64 t = tcg_temp_new_i64(); 646 647 tcg_gen_shri_i64(t, a, shift); 648 tcg_gen_andi_i64(t, t, mask); 649 tcg_gen_andi_i64(d, d, ~mask); 650 tcg_gen_or_i64(d, d, t); 651 } 652 653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 654 { 655 tcg_gen_shri_i32(a, a, shift); 656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 657 } 658 659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 660 { 661 tcg_gen_shri_i64(a, a, shift); 662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 663 } 664 665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 666 { 667 TCGv_vec t = tcg_temp_new_vec_matching(d); 668 TCGv_vec m = tcg_temp_new_vec_matching(d); 669 670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 671 tcg_gen_shri_vec(vece, t, a, sh); 672 tcg_gen_and_vec(vece, d, d, m); 673 tcg_gen_or_vec(vece, d, d, t); 674 } 675 676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 677 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 678 { 679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 680 const GVecGen2i ops[4] = { 681 { .fni8 = gen_shr8_ins_i64, 682 .fniv = gen_shr_ins_vec, 683 .fno = gen_helper_gvec_sri_b, 684 .load_dest = true, 685 .opt_opc = vecop_list, 686 .vece = MO_8 }, 687 { .fni8 = gen_shr16_ins_i64, 688 .fniv = gen_shr_ins_vec, 689 .fno = gen_helper_gvec_sri_h, 690 .load_dest = true, 691 .opt_opc = vecop_list, 692 .vece = MO_16 }, 693 { .fni4 = gen_shr32_ins_i32, 694 .fniv = gen_shr_ins_vec, 695 .fno = gen_helper_gvec_sri_s, 696 .load_dest = true, 697 .opt_opc = vecop_list, 698 .vece = MO_32 }, 699 { .fni8 = gen_shr64_ins_i64, 700 .fniv = gen_shr_ins_vec, 701 .fno = gen_helper_gvec_sri_d, 702 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 703 .load_dest = true, 704 .opt_opc = vecop_list, 705 .vece = MO_64 }, 706 }; 707 708 /* tszimm encoding produces immediates in the range [1..esize]. */ 709 tcg_debug_assert(shift > 0); 710 tcg_debug_assert(shift <= (8 << vece)); 711 712 /* Shift of esize leaves destination unchanged. */ 713 if (shift < (8 << vece)) { 714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 715 } else { 716 /* Nop, but we do need to clear the tail. */ 717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 718 } 719 } 720 721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 722 { 723 uint64_t mask = dup_const(MO_8, 0xff << shift); 724 TCGv_i64 t = tcg_temp_new_i64(); 725 726 tcg_gen_shli_i64(t, a, shift); 727 tcg_gen_andi_i64(t, t, mask); 728 tcg_gen_andi_i64(d, d, ~mask); 729 tcg_gen_or_i64(d, d, t); 730 } 731 732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 733 { 734 uint64_t mask = dup_const(MO_16, 0xffff << shift); 735 TCGv_i64 t = tcg_temp_new_i64(); 736 737 tcg_gen_shli_i64(t, a, shift); 738 tcg_gen_andi_i64(t, t, mask); 739 tcg_gen_andi_i64(d, d, ~mask); 740 tcg_gen_or_i64(d, d, t); 741 } 742 743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 744 { 745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 746 } 747 748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 749 { 750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 751 } 752 753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 754 { 755 TCGv_vec t = tcg_temp_new_vec_matching(d); 756 TCGv_vec m = tcg_temp_new_vec_matching(d); 757 758 tcg_gen_shli_vec(vece, t, a, sh); 759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 760 tcg_gen_and_vec(vece, d, d, m); 761 tcg_gen_or_vec(vece, d, d, t); 762 } 763 764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 765 int64_t shift, uint32_t opr_sz, uint32_t max_sz) 766 { 767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 768 const GVecGen2i ops[4] = { 769 { .fni8 = gen_shl8_ins_i64, 770 .fniv = gen_shl_ins_vec, 771 .fno = gen_helper_gvec_sli_b, 772 .load_dest = true, 773 .opt_opc = vecop_list, 774 .vece = MO_8 }, 775 { .fni8 = gen_shl16_ins_i64, 776 .fniv = gen_shl_ins_vec, 777 .fno = gen_helper_gvec_sli_h, 778 .load_dest = true, 779 .opt_opc = vecop_list, 780 .vece = MO_16 }, 781 { .fni4 = gen_shl32_ins_i32, 782 .fniv = gen_shl_ins_vec, 783 .fno = gen_helper_gvec_sli_s, 784 .load_dest = true, 785 .opt_opc = vecop_list, 786 .vece = MO_32 }, 787 { .fni8 = gen_shl64_ins_i64, 788 .fniv = gen_shl_ins_vec, 789 .fno = gen_helper_gvec_sli_d, 790 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 791 .load_dest = true, 792 .opt_opc = vecop_list, 793 .vece = MO_64 }, 794 }; 795 796 /* tszimm encoding produces immediates in the range [0..esize-1]. */ 797 tcg_debug_assert(shift >= 0); 798 tcg_debug_assert(shift < (8 << vece)); 799 800 if (shift == 0) { 801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 802 } else { 803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 804 } 805 } 806 807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 808 { 809 gen_helper_neon_mul_u8(a, a, b); 810 gen_helper_neon_add_u8(d, d, a); 811 } 812 813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 814 { 815 gen_helper_neon_mul_u8(a, a, b); 816 gen_helper_neon_sub_u8(d, d, a); 817 } 818 819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 820 { 821 gen_helper_neon_mul_u16(a, a, b); 822 gen_helper_neon_add_u16(d, d, a); 823 } 824 825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 826 { 827 gen_helper_neon_mul_u16(a, a, b); 828 gen_helper_neon_sub_u16(d, d, a); 829 } 830 831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 832 { 833 tcg_gen_mul_i32(a, a, b); 834 tcg_gen_add_i32(d, d, a); 835 } 836 837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 838 { 839 tcg_gen_mul_i32(a, a, b); 840 tcg_gen_sub_i32(d, d, a); 841 } 842 843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 844 { 845 tcg_gen_mul_i64(a, a, b); 846 tcg_gen_add_i64(d, d, a); 847 } 848 849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 850 { 851 tcg_gen_mul_i64(a, a, b); 852 tcg_gen_sub_i64(d, d, a); 853 } 854 855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 856 { 857 tcg_gen_mul_vec(vece, a, a, b); 858 tcg_gen_add_vec(vece, d, d, a); 859 } 860 861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 862 { 863 tcg_gen_mul_vec(vece, a, a, b); 864 tcg_gen_sub_vec(vece, d, d, a); 865 } 866 867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 868 * these tables are shared with AArch64 which does support them. 869 */ 870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 872 { 873 static const TCGOpcode vecop_list[] = { 874 INDEX_op_mul_vec, INDEX_op_add_vec, 0 875 }; 876 static const GVecGen3 ops[4] = { 877 { .fni4 = gen_mla8_i32, 878 .fniv = gen_mla_vec, 879 .load_dest = true, 880 .opt_opc = vecop_list, 881 .vece = MO_8 }, 882 { .fni4 = gen_mla16_i32, 883 .fniv = gen_mla_vec, 884 .load_dest = true, 885 .opt_opc = vecop_list, 886 .vece = MO_16 }, 887 { .fni4 = gen_mla32_i32, 888 .fniv = gen_mla_vec, 889 .load_dest = true, 890 .opt_opc = vecop_list, 891 .vece = MO_32 }, 892 { .fni8 = gen_mla64_i64, 893 .fniv = gen_mla_vec, 894 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 895 .load_dest = true, 896 .opt_opc = vecop_list, 897 .vece = MO_64 }, 898 }; 899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 900 } 901 902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 904 { 905 static const TCGOpcode vecop_list[] = { 906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0 907 }; 908 static const GVecGen3 ops[4] = { 909 { .fni4 = gen_mls8_i32, 910 .fniv = gen_mls_vec, 911 .load_dest = true, 912 .opt_opc = vecop_list, 913 .vece = MO_8 }, 914 { .fni4 = gen_mls16_i32, 915 .fniv = gen_mls_vec, 916 .load_dest = true, 917 .opt_opc = vecop_list, 918 .vece = MO_16 }, 919 { .fni4 = gen_mls32_i32, 920 .fniv = gen_mls_vec, 921 .load_dest = true, 922 .opt_opc = vecop_list, 923 .vece = MO_32 }, 924 { .fni8 = gen_mls64_i64, 925 .fniv = gen_mls_vec, 926 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 927 .load_dest = true, 928 .opt_opc = vecop_list, 929 .vece = MO_64 }, 930 }; 931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 932 } 933 934 /* CMTST : test is "if (X & Y != 0)". */ 935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 936 { 937 tcg_gen_and_i32(d, a, b); 938 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); 939 } 940 941 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 942 { 943 tcg_gen_and_i64(d, a, b); 944 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); 945 } 946 947 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 948 { 949 tcg_gen_and_vec(vece, d, a, b); 950 tcg_gen_dupi_vec(vece, a, 0); 951 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); 952 } 953 954 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 955 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 956 { 957 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 958 static const GVecGen3 ops[4] = { 959 { .fni4 = gen_helper_neon_tst_u8, 960 .fniv = gen_cmtst_vec, 961 .opt_opc = vecop_list, 962 .vece = MO_8 }, 963 { .fni4 = gen_helper_neon_tst_u16, 964 .fniv = gen_cmtst_vec, 965 .opt_opc = vecop_list, 966 .vece = MO_16 }, 967 { .fni4 = gen_cmtst_i32, 968 .fniv = gen_cmtst_vec, 969 .opt_opc = vecop_list, 970 .vece = MO_32 }, 971 { .fni8 = gen_cmtst_i64, 972 .fniv = gen_cmtst_vec, 973 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 974 .opt_opc = vecop_list, 975 .vece = MO_64 }, 976 }; 977 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 978 } 979 980 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 981 { 982 TCGv_i32 lval = tcg_temp_new_i32(); 983 TCGv_i32 rval = tcg_temp_new_i32(); 984 TCGv_i32 lsh = tcg_temp_new_i32(); 985 TCGv_i32 rsh = tcg_temp_new_i32(); 986 TCGv_i32 zero = tcg_constant_i32(0); 987 TCGv_i32 max = tcg_constant_i32(32); 988 989 /* 990 * Rely on the TCG guarantee that out of range shifts produce 991 * unspecified results, not undefined behaviour (i.e. no trap). 992 * Discard out-of-range results after the fact. 993 */ 994 tcg_gen_ext8s_i32(lsh, shift); 995 tcg_gen_neg_i32(rsh, lsh); 996 tcg_gen_shl_i32(lval, src, lsh); 997 tcg_gen_shr_i32(rval, src, rsh); 998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 999 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 1000 } 1001 1002 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1003 { 1004 TCGv_i64 lval = tcg_temp_new_i64(); 1005 TCGv_i64 rval = tcg_temp_new_i64(); 1006 TCGv_i64 lsh = tcg_temp_new_i64(); 1007 TCGv_i64 rsh = tcg_temp_new_i64(); 1008 TCGv_i64 zero = tcg_constant_i64(0); 1009 TCGv_i64 max = tcg_constant_i64(64); 1010 1011 /* 1012 * Rely on the TCG guarantee that out of range shifts produce 1013 * unspecified results, not undefined behaviour (i.e. no trap). 1014 * Discard out-of-range results after the fact. 1015 */ 1016 tcg_gen_ext8s_i64(lsh, shift); 1017 tcg_gen_neg_i64(rsh, lsh); 1018 tcg_gen_shl_i64(lval, src, lsh); 1019 tcg_gen_shr_i64(rval, src, rsh); 1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 1021 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 1022 } 1023 1024 static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 1025 TCGv_vec src, TCGv_vec shift) 1026 { 1027 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1028 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1029 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1030 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1031 TCGv_vec msk, max; 1032 1033 tcg_gen_neg_vec(vece, rsh, shift); 1034 if (vece == MO_8) { 1035 tcg_gen_mov_vec(lsh, shift); 1036 } else { 1037 msk = tcg_temp_new_vec_matching(dst); 1038 tcg_gen_dupi_vec(vece, msk, 0xff); 1039 tcg_gen_and_vec(vece, lsh, shift, msk); 1040 tcg_gen_and_vec(vece, rsh, rsh, msk); 1041 } 1042 1043 /* 1044 * Rely on the TCG guarantee that out of range shifts produce 1045 * unspecified results, not undefined behaviour (i.e. no trap). 1046 * Discard out-of-range results after the fact. 1047 */ 1048 tcg_gen_shlv_vec(vece, lval, src, lsh); 1049 tcg_gen_shrv_vec(vece, rval, src, rsh); 1050 1051 max = tcg_temp_new_vec_matching(dst); 1052 tcg_gen_dupi_vec(vece, max, 8 << vece); 1053 1054 /* 1055 * The choice of LT (signed) and GEU (unsigned) are biased toward 1056 * the instructions of the x86_64 host. For MO_8, the whole byte 1057 * is significant so we must use an unsigned compare; otherwise we 1058 * have already masked to a byte and so a signed compare works. 1059 * Other tcg hosts have a full set of comparisons and do not care. 1060 */ 1061 if (vece == MO_8) { 1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 1063 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 1064 tcg_gen_andc_vec(vece, lval, lval, lsh); 1065 tcg_gen_andc_vec(vece, rval, rval, rsh); 1066 } else { 1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 1068 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 1069 tcg_gen_and_vec(vece, lval, lval, lsh); 1070 tcg_gen_and_vec(vece, rval, rval, rsh); 1071 } 1072 tcg_gen_or_vec(vece, dst, lval, rval); 1073 } 1074 1075 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1076 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1077 { 1078 static const TCGOpcode vecop_list[] = { 1079 INDEX_op_neg_vec, INDEX_op_shlv_vec, 1080 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 1081 }; 1082 static const GVecGen3 ops[4] = { 1083 { .fniv = gen_ushl_vec, 1084 .fno = gen_helper_gvec_ushl_b, 1085 .opt_opc = vecop_list, 1086 .vece = MO_8 }, 1087 { .fniv = gen_ushl_vec, 1088 .fno = gen_helper_gvec_ushl_h, 1089 .opt_opc = vecop_list, 1090 .vece = MO_16 }, 1091 { .fni4 = gen_ushl_i32, 1092 .fniv = gen_ushl_vec, 1093 .opt_opc = vecop_list, 1094 .vece = MO_32 }, 1095 { .fni8 = gen_ushl_i64, 1096 .fniv = gen_ushl_vec, 1097 .opt_opc = vecop_list, 1098 .vece = MO_64 }, 1099 }; 1100 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1101 } 1102 1103 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 1104 { 1105 TCGv_i32 lval = tcg_temp_new_i32(); 1106 TCGv_i32 rval = tcg_temp_new_i32(); 1107 TCGv_i32 lsh = tcg_temp_new_i32(); 1108 TCGv_i32 rsh = tcg_temp_new_i32(); 1109 TCGv_i32 zero = tcg_constant_i32(0); 1110 TCGv_i32 max = tcg_constant_i32(31); 1111 1112 /* 1113 * Rely on the TCG guarantee that out of range shifts produce 1114 * unspecified results, not undefined behaviour (i.e. no trap). 1115 * Discard out-of-range results after the fact. 1116 */ 1117 tcg_gen_ext8s_i32(lsh, shift); 1118 tcg_gen_neg_i32(rsh, lsh); 1119 tcg_gen_shl_i32(lval, src, lsh); 1120 tcg_gen_umin_i32(rsh, rsh, max); 1121 tcg_gen_sar_i32(rval, src, rsh); 1122 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 1123 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 1124 } 1125 1126 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 1127 { 1128 TCGv_i64 lval = tcg_temp_new_i64(); 1129 TCGv_i64 rval = tcg_temp_new_i64(); 1130 TCGv_i64 lsh = tcg_temp_new_i64(); 1131 TCGv_i64 rsh = tcg_temp_new_i64(); 1132 TCGv_i64 zero = tcg_constant_i64(0); 1133 TCGv_i64 max = tcg_constant_i64(63); 1134 1135 /* 1136 * Rely on the TCG guarantee that out of range shifts produce 1137 * unspecified results, not undefined behaviour (i.e. no trap). 1138 * Discard out-of-range results after the fact. 1139 */ 1140 tcg_gen_ext8s_i64(lsh, shift); 1141 tcg_gen_neg_i64(rsh, lsh); 1142 tcg_gen_shl_i64(lval, src, lsh); 1143 tcg_gen_umin_i64(rsh, rsh, max); 1144 tcg_gen_sar_i64(rval, src, rsh); 1145 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 1146 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 1147 } 1148 1149 static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 1150 TCGv_vec src, TCGv_vec shift) 1151 { 1152 TCGv_vec lval = tcg_temp_new_vec_matching(dst); 1153 TCGv_vec rval = tcg_temp_new_vec_matching(dst); 1154 TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 1155 TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1156 TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 1157 1158 /* 1159 * Rely on the TCG guarantee that out of range shifts produce 1160 * unspecified results, not undefined behaviour (i.e. no trap). 1161 * Discard out-of-range results after the fact. 1162 */ 1163 tcg_gen_neg_vec(vece, rsh, shift); 1164 if (vece == MO_8) { 1165 tcg_gen_mov_vec(lsh, shift); 1166 } else { 1167 tcg_gen_dupi_vec(vece, tmp, 0xff); 1168 tcg_gen_and_vec(vece, lsh, shift, tmp); 1169 tcg_gen_and_vec(vece, rsh, rsh, tmp); 1170 } 1171 1172 /* Bound rsh so out of bound right shift gets -1. */ 1173 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 1174 tcg_gen_umin_vec(vece, rsh, rsh, tmp); 1175 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 1176 1177 tcg_gen_shlv_vec(vece, lval, src, lsh); 1178 tcg_gen_sarv_vec(vece, rval, src, rsh); 1179 1180 /* Select in-bound left shift. */ 1181 tcg_gen_andc_vec(vece, lval, lval, tmp); 1182 1183 /* Select between left and right shift. */ 1184 if (vece == MO_8) { 1185 tcg_gen_dupi_vec(vece, tmp, 0); 1186 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 1187 } else { 1188 tcg_gen_dupi_vec(vece, tmp, 0x80); 1189 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 1190 } 1191 } 1192 1193 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1194 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1195 { 1196 static const TCGOpcode vecop_list[] = { 1197 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1198 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 1199 }; 1200 static const GVecGen3 ops[4] = { 1201 { .fniv = gen_sshl_vec, 1202 .fno = gen_helper_gvec_sshl_b, 1203 .opt_opc = vecop_list, 1204 .vece = MO_8 }, 1205 { .fniv = gen_sshl_vec, 1206 .fno = gen_helper_gvec_sshl_h, 1207 .opt_opc = vecop_list, 1208 .vece = MO_16 }, 1209 { .fni4 = gen_sshl_i32, 1210 .fniv = gen_sshl_vec, 1211 .opt_opc = vecop_list, 1212 .vece = MO_32 }, 1213 { .fni8 = gen_sshl_i64, 1214 .fniv = gen_sshl_vec, 1215 .opt_opc = vecop_list, 1216 .vece = MO_64 }, 1217 }; 1218 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1219 } 1220 1221 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1222 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1223 { 1224 static gen_helper_gvec_3 * const fns[] = { 1225 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1226 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1227 }; 1228 tcg_debug_assert(vece <= MO_64); 1229 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1230 } 1231 1232 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1233 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1234 { 1235 static gen_helper_gvec_3 * const fns[] = { 1236 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1237 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1238 }; 1239 tcg_debug_assert(vece <= MO_64); 1240 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1241 } 1242 1243 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1244 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1245 { 1246 static gen_helper_gvec_3_ptr * const fns[] = { 1247 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1248 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1249 }; 1250 tcg_debug_assert(vece <= MO_64); 1251 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1252 opr_sz, max_sz, 0, fns[vece]); 1253 } 1254 1255 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1256 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1257 { 1258 static gen_helper_gvec_3_ptr * const fns[] = { 1259 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1260 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1261 }; 1262 tcg_debug_assert(vece <= MO_64); 1263 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1264 opr_sz, max_sz, 0, fns[vece]); 1265 } 1266 1267 void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1268 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1269 { 1270 static gen_helper_gvec_3_ptr * const fns[] = { 1271 gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1272 gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1273 }; 1274 tcg_debug_assert(vece <= MO_64); 1275 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1276 opr_sz, max_sz, 0, fns[vece]); 1277 } 1278 1279 void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1280 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1281 { 1282 static gen_helper_gvec_3_ptr * const fns[] = { 1283 gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1284 gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1285 }; 1286 tcg_debug_assert(vece <= MO_64); 1287 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1288 opr_sz, max_sz, 0, fns[vece]); 1289 } 1290 1291 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1292 { 1293 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1294 TCGv_i64 tmp = tcg_temp_new_i64(); 1295 1296 tcg_gen_add_i64(tmp, a, b); 1297 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1298 tcg_gen_xor_i64(tmp, tmp, res); 1299 tcg_gen_or_i64(qc, qc, tmp); 1300 } 1301 1302 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1303 { 1304 TCGv_i64 t = tcg_temp_new_i64(); 1305 1306 tcg_gen_add_i64(t, a, b); 1307 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1308 tcg_constant_i64(UINT64_MAX), t); 1309 tcg_gen_xor_i64(t, t, res); 1310 tcg_gen_or_i64(qc, qc, t); 1311 } 1312 1313 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1314 TCGv_vec a, TCGv_vec b) 1315 { 1316 TCGv_vec x = tcg_temp_new_vec_matching(t); 1317 tcg_gen_add_vec(vece, x, a, b); 1318 tcg_gen_usadd_vec(vece, t, a, b); 1319 tcg_gen_xor_vec(vece, x, x, t); 1320 tcg_gen_or_vec(vece, qc, qc, x); 1321 } 1322 1323 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1324 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1325 { 1326 static const TCGOpcode vecop_list[] = { 1327 INDEX_op_usadd_vec, INDEX_op_add_vec, 0 1328 }; 1329 static const GVecGen4 ops[4] = { 1330 { .fniv = gen_uqadd_vec, 1331 .fno = gen_helper_gvec_uqadd_b, 1332 .write_aofs = true, 1333 .opt_opc = vecop_list, 1334 .vece = MO_8 }, 1335 { .fniv = gen_uqadd_vec, 1336 .fno = gen_helper_gvec_uqadd_h, 1337 .write_aofs = true, 1338 .opt_opc = vecop_list, 1339 .vece = MO_16 }, 1340 { .fniv = gen_uqadd_vec, 1341 .fno = gen_helper_gvec_uqadd_s, 1342 .write_aofs = true, 1343 .opt_opc = vecop_list, 1344 .vece = MO_32 }, 1345 { .fniv = gen_uqadd_vec, 1346 .fni8 = gen_uqadd_d, 1347 .fno = gen_helper_gvec_uqadd_d, 1348 .write_aofs = true, 1349 .opt_opc = vecop_list, 1350 .vece = MO_64 }, 1351 }; 1352 1353 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1354 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1355 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1356 } 1357 1358 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1359 { 1360 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1361 int64_t min = -1ll - max; 1362 TCGv_i64 tmp = tcg_temp_new_i64(); 1363 1364 tcg_gen_add_i64(tmp, a, b); 1365 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1366 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1367 tcg_gen_xor_i64(tmp, tmp, res); 1368 tcg_gen_or_i64(qc, qc, tmp); 1369 } 1370 1371 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1372 { 1373 TCGv_i64 t0 = tcg_temp_new_i64(); 1374 TCGv_i64 t1 = tcg_temp_new_i64(); 1375 TCGv_i64 t2 = tcg_temp_new_i64(); 1376 1377 tcg_gen_add_i64(t0, a, b); 1378 1379 /* Compute signed overflow indication into T1 */ 1380 tcg_gen_xor_i64(t1, a, b); 1381 tcg_gen_xor_i64(t2, t0, a); 1382 tcg_gen_andc_i64(t1, t2, t1); 1383 1384 /* Compute saturated value into T2 */ 1385 tcg_gen_sari_i64(t2, a, 63); 1386 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1387 1388 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1389 tcg_gen_xor_i64(t0, t0, res); 1390 tcg_gen_or_i64(qc, qc, t0); 1391 } 1392 1393 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1394 TCGv_vec a, TCGv_vec b) 1395 { 1396 TCGv_vec x = tcg_temp_new_vec_matching(t); 1397 tcg_gen_add_vec(vece, x, a, b); 1398 tcg_gen_ssadd_vec(vece, t, a, b); 1399 tcg_gen_xor_vec(vece, x, x, t); 1400 tcg_gen_or_vec(vece, qc, qc, x); 1401 } 1402 1403 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1404 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1405 { 1406 static const TCGOpcode vecop_list[] = { 1407 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 1408 }; 1409 static const GVecGen4 ops[4] = { 1410 { .fniv = gen_sqadd_vec, 1411 .fno = gen_helper_gvec_sqadd_b, 1412 .opt_opc = vecop_list, 1413 .write_aofs = true, 1414 .vece = MO_8 }, 1415 { .fniv = gen_sqadd_vec, 1416 .fno = gen_helper_gvec_sqadd_h, 1417 .opt_opc = vecop_list, 1418 .write_aofs = true, 1419 .vece = MO_16 }, 1420 { .fniv = gen_sqadd_vec, 1421 .fno = gen_helper_gvec_sqadd_s, 1422 .opt_opc = vecop_list, 1423 .write_aofs = true, 1424 .vece = MO_32 }, 1425 { .fniv = gen_sqadd_vec, 1426 .fni8 = gen_sqadd_d, 1427 .fno = gen_helper_gvec_sqadd_d, 1428 .opt_opc = vecop_list, 1429 .write_aofs = true, 1430 .vece = MO_64 }, 1431 }; 1432 1433 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1434 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1435 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1436 } 1437 1438 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1439 { 1440 TCGv_i64 tmp = tcg_temp_new_i64(); 1441 1442 tcg_gen_sub_i64(tmp, a, b); 1443 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1444 tcg_gen_xor_i64(tmp, tmp, res); 1445 tcg_gen_or_i64(qc, qc, tmp); 1446 } 1447 1448 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1449 { 1450 TCGv_i64 t = tcg_temp_new_i64(); 1451 1452 tcg_gen_sub_i64(t, a, b); 1453 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1454 tcg_gen_xor_i64(t, t, res); 1455 tcg_gen_or_i64(qc, qc, t); 1456 } 1457 1458 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1459 TCGv_vec a, TCGv_vec b) 1460 { 1461 TCGv_vec x = tcg_temp_new_vec_matching(t); 1462 tcg_gen_sub_vec(vece, x, a, b); 1463 tcg_gen_ussub_vec(vece, t, a, b); 1464 tcg_gen_xor_vec(vece, x, x, t); 1465 tcg_gen_or_vec(vece, qc, qc, x); 1466 } 1467 1468 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1469 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1470 { 1471 static const TCGOpcode vecop_list[] = { 1472 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 1473 }; 1474 static const GVecGen4 ops[4] = { 1475 { .fniv = gen_uqsub_vec, 1476 .fno = gen_helper_gvec_uqsub_b, 1477 .opt_opc = vecop_list, 1478 .write_aofs = true, 1479 .vece = MO_8 }, 1480 { .fniv = gen_uqsub_vec, 1481 .fno = gen_helper_gvec_uqsub_h, 1482 .opt_opc = vecop_list, 1483 .write_aofs = true, 1484 .vece = MO_16 }, 1485 { .fniv = gen_uqsub_vec, 1486 .fno = gen_helper_gvec_uqsub_s, 1487 .opt_opc = vecop_list, 1488 .write_aofs = true, 1489 .vece = MO_32 }, 1490 { .fniv = gen_uqsub_vec, 1491 .fni8 = gen_uqsub_d, 1492 .fno = gen_helper_gvec_uqsub_d, 1493 .opt_opc = vecop_list, 1494 .write_aofs = true, 1495 .vece = MO_64 }, 1496 }; 1497 1498 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1499 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1500 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1501 } 1502 1503 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1504 { 1505 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1506 int64_t min = -1ll - max; 1507 TCGv_i64 tmp = tcg_temp_new_i64(); 1508 1509 tcg_gen_sub_i64(tmp, a, b); 1510 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1511 tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1512 tcg_gen_xor_i64(tmp, tmp, res); 1513 tcg_gen_or_i64(qc, qc, tmp); 1514 } 1515 1516 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1517 { 1518 TCGv_i64 t0 = tcg_temp_new_i64(); 1519 TCGv_i64 t1 = tcg_temp_new_i64(); 1520 TCGv_i64 t2 = tcg_temp_new_i64(); 1521 1522 tcg_gen_sub_i64(t0, a, b); 1523 1524 /* Compute signed overflow indication into T1 */ 1525 tcg_gen_xor_i64(t1, a, b); 1526 tcg_gen_xor_i64(t2, t0, a); 1527 tcg_gen_and_i64(t1, t1, t2); 1528 1529 /* Compute saturated value into T2 */ 1530 tcg_gen_sari_i64(t2, a, 63); 1531 tcg_gen_xori_i64(t2, t2, INT64_MAX); 1532 1533 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1534 tcg_gen_xor_i64(t0, t0, res); 1535 tcg_gen_or_i64(qc, qc, t0); 1536 } 1537 1538 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 1539 TCGv_vec a, TCGv_vec b) 1540 { 1541 TCGv_vec x = tcg_temp_new_vec_matching(t); 1542 tcg_gen_sub_vec(vece, x, a, b); 1543 tcg_gen_sssub_vec(vece, t, a, b); 1544 tcg_gen_xor_vec(vece, x, x, t); 1545 tcg_gen_or_vec(vece, qc, qc, x); 1546 } 1547 1548 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1549 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1550 { 1551 static const TCGOpcode vecop_list[] = { 1552 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 1553 }; 1554 static const GVecGen4 ops[4] = { 1555 { .fniv = gen_sqsub_vec, 1556 .fno = gen_helper_gvec_sqsub_b, 1557 .opt_opc = vecop_list, 1558 .write_aofs = true, 1559 .vece = MO_8 }, 1560 { .fniv = gen_sqsub_vec, 1561 .fno = gen_helper_gvec_sqsub_h, 1562 .opt_opc = vecop_list, 1563 .write_aofs = true, 1564 .vece = MO_16 }, 1565 { .fniv = gen_sqsub_vec, 1566 .fno = gen_helper_gvec_sqsub_s, 1567 .opt_opc = vecop_list, 1568 .write_aofs = true, 1569 .vece = MO_32 }, 1570 { .fniv = gen_sqsub_vec, 1571 .fni8 = gen_sqsub_d, 1572 .fno = gen_helper_gvec_sqsub_d, 1573 .opt_opc = vecop_list, 1574 .write_aofs = true, 1575 .vece = MO_64 }, 1576 }; 1577 1578 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 1579 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 1580 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1581 } 1582 1583 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1584 { 1585 TCGv_i32 t = tcg_temp_new_i32(); 1586 1587 tcg_gen_sub_i32(t, a, b); 1588 tcg_gen_sub_i32(d, b, a); 1589 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 1590 } 1591 1592 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1593 { 1594 TCGv_i64 t = tcg_temp_new_i64(); 1595 1596 tcg_gen_sub_i64(t, a, b); 1597 tcg_gen_sub_i64(d, b, a); 1598 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 1599 } 1600 1601 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1602 { 1603 TCGv_vec t = tcg_temp_new_vec_matching(d); 1604 1605 tcg_gen_smin_vec(vece, t, a, b); 1606 tcg_gen_smax_vec(vece, d, a, b); 1607 tcg_gen_sub_vec(vece, d, d, t); 1608 } 1609 1610 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1611 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1612 { 1613 static const TCGOpcode vecop_list[] = { 1614 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1615 }; 1616 static const GVecGen3 ops[4] = { 1617 { .fniv = gen_sabd_vec, 1618 .fno = gen_helper_gvec_sabd_b, 1619 .opt_opc = vecop_list, 1620 .vece = MO_8 }, 1621 { .fniv = gen_sabd_vec, 1622 .fno = gen_helper_gvec_sabd_h, 1623 .opt_opc = vecop_list, 1624 .vece = MO_16 }, 1625 { .fni4 = gen_sabd_i32, 1626 .fniv = gen_sabd_vec, 1627 .fno = gen_helper_gvec_sabd_s, 1628 .opt_opc = vecop_list, 1629 .vece = MO_32 }, 1630 { .fni8 = gen_sabd_i64, 1631 .fniv = gen_sabd_vec, 1632 .fno = gen_helper_gvec_sabd_d, 1633 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1634 .opt_opc = vecop_list, 1635 .vece = MO_64 }, 1636 }; 1637 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1638 } 1639 1640 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1641 { 1642 TCGv_i32 t = tcg_temp_new_i32(); 1643 1644 tcg_gen_sub_i32(t, a, b); 1645 tcg_gen_sub_i32(d, b, a); 1646 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 1647 } 1648 1649 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1650 { 1651 TCGv_i64 t = tcg_temp_new_i64(); 1652 1653 tcg_gen_sub_i64(t, a, b); 1654 tcg_gen_sub_i64(d, b, a); 1655 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 1656 } 1657 1658 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1659 { 1660 TCGv_vec t = tcg_temp_new_vec_matching(d); 1661 1662 tcg_gen_umin_vec(vece, t, a, b); 1663 tcg_gen_umax_vec(vece, d, a, b); 1664 tcg_gen_sub_vec(vece, d, d, t); 1665 } 1666 1667 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1668 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1669 { 1670 static const TCGOpcode vecop_list[] = { 1671 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1672 }; 1673 static const GVecGen3 ops[4] = { 1674 { .fniv = gen_uabd_vec, 1675 .fno = gen_helper_gvec_uabd_b, 1676 .opt_opc = vecop_list, 1677 .vece = MO_8 }, 1678 { .fniv = gen_uabd_vec, 1679 .fno = gen_helper_gvec_uabd_h, 1680 .opt_opc = vecop_list, 1681 .vece = MO_16 }, 1682 { .fni4 = gen_uabd_i32, 1683 .fniv = gen_uabd_vec, 1684 .fno = gen_helper_gvec_uabd_s, 1685 .opt_opc = vecop_list, 1686 .vece = MO_32 }, 1687 { .fni8 = gen_uabd_i64, 1688 .fniv = gen_uabd_vec, 1689 .fno = gen_helper_gvec_uabd_d, 1690 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1691 .opt_opc = vecop_list, 1692 .vece = MO_64 }, 1693 }; 1694 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1695 } 1696 1697 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1698 { 1699 TCGv_i32 t = tcg_temp_new_i32(); 1700 gen_sabd_i32(t, a, b); 1701 tcg_gen_add_i32(d, d, t); 1702 } 1703 1704 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1705 { 1706 TCGv_i64 t = tcg_temp_new_i64(); 1707 gen_sabd_i64(t, a, b); 1708 tcg_gen_add_i64(d, d, t); 1709 } 1710 1711 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1712 { 1713 TCGv_vec t = tcg_temp_new_vec_matching(d); 1714 gen_sabd_vec(vece, t, a, b); 1715 tcg_gen_add_vec(vece, d, d, t); 1716 } 1717 1718 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1719 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1720 { 1721 static const TCGOpcode vecop_list[] = { 1722 INDEX_op_sub_vec, INDEX_op_add_vec, 1723 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 1724 }; 1725 static const GVecGen3 ops[4] = { 1726 { .fniv = gen_saba_vec, 1727 .fno = gen_helper_gvec_saba_b, 1728 .opt_opc = vecop_list, 1729 .load_dest = true, 1730 .vece = MO_8 }, 1731 { .fniv = gen_saba_vec, 1732 .fno = gen_helper_gvec_saba_h, 1733 .opt_opc = vecop_list, 1734 .load_dest = true, 1735 .vece = MO_16 }, 1736 { .fni4 = gen_saba_i32, 1737 .fniv = gen_saba_vec, 1738 .fno = gen_helper_gvec_saba_s, 1739 .opt_opc = vecop_list, 1740 .load_dest = true, 1741 .vece = MO_32 }, 1742 { .fni8 = gen_saba_i64, 1743 .fniv = gen_saba_vec, 1744 .fno = gen_helper_gvec_saba_d, 1745 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1746 .opt_opc = vecop_list, 1747 .load_dest = true, 1748 .vece = MO_64 }, 1749 }; 1750 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1751 } 1752 1753 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1754 { 1755 TCGv_i32 t = tcg_temp_new_i32(); 1756 gen_uabd_i32(t, a, b); 1757 tcg_gen_add_i32(d, d, t); 1758 } 1759 1760 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1761 { 1762 TCGv_i64 t = tcg_temp_new_i64(); 1763 gen_uabd_i64(t, a, b); 1764 tcg_gen_add_i64(d, d, t); 1765 } 1766 1767 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1768 { 1769 TCGv_vec t = tcg_temp_new_vec_matching(d); 1770 gen_uabd_vec(vece, t, a, b); 1771 tcg_gen_add_vec(vece, d, d, t); 1772 } 1773 1774 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1775 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1776 { 1777 static const TCGOpcode vecop_list[] = { 1778 INDEX_op_sub_vec, INDEX_op_add_vec, 1779 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 1780 }; 1781 static const GVecGen3 ops[4] = { 1782 { .fniv = gen_uaba_vec, 1783 .fno = gen_helper_gvec_uaba_b, 1784 .opt_opc = vecop_list, 1785 .load_dest = true, 1786 .vece = MO_8 }, 1787 { .fniv = gen_uaba_vec, 1788 .fno = gen_helper_gvec_uaba_h, 1789 .opt_opc = vecop_list, 1790 .load_dest = true, 1791 .vece = MO_16 }, 1792 { .fni4 = gen_uaba_i32, 1793 .fniv = gen_uaba_vec, 1794 .fno = gen_helper_gvec_uaba_s, 1795 .opt_opc = vecop_list, 1796 .load_dest = true, 1797 .vece = MO_32 }, 1798 { .fni8 = gen_uaba_i64, 1799 .fniv = gen_uaba_vec, 1800 .fno = gen_helper_gvec_uaba_d, 1801 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1802 .opt_opc = vecop_list, 1803 .load_dest = true, 1804 .vece = MO_64 }, 1805 }; 1806 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 1807 } 1808 1809 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1810 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1811 { 1812 static gen_helper_gvec_3 * const fns[4] = { 1813 gen_helper_gvec_addp_b, 1814 gen_helper_gvec_addp_h, 1815 gen_helper_gvec_addp_s, 1816 gen_helper_gvec_addp_d, 1817 }; 1818 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1819 } 1820 1821 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1822 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1823 { 1824 static gen_helper_gvec_3 * const fns[4] = { 1825 gen_helper_gvec_smaxp_b, 1826 gen_helper_gvec_smaxp_h, 1827 gen_helper_gvec_smaxp_s, 1828 }; 1829 tcg_debug_assert(vece <= MO_32); 1830 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1831 } 1832 1833 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1834 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1835 { 1836 static gen_helper_gvec_3 * const fns[4] = { 1837 gen_helper_gvec_sminp_b, 1838 gen_helper_gvec_sminp_h, 1839 gen_helper_gvec_sminp_s, 1840 }; 1841 tcg_debug_assert(vece <= MO_32); 1842 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1843 } 1844 1845 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1846 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1847 { 1848 static gen_helper_gvec_3 * const fns[4] = { 1849 gen_helper_gvec_umaxp_b, 1850 gen_helper_gvec_umaxp_h, 1851 gen_helper_gvec_umaxp_s, 1852 }; 1853 tcg_debug_assert(vece <= MO_32); 1854 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1855 } 1856 1857 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1858 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1859 { 1860 static gen_helper_gvec_3 * const fns[4] = { 1861 gen_helper_gvec_uminp_b, 1862 gen_helper_gvec_uminp_h, 1863 gen_helper_gvec_uminp_s, 1864 }; 1865 tcg_debug_assert(vece <= MO_32); 1866 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1867 } 1868