1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "tcg.h" 23 #include "tcg-op.h" 24 #include "tcg-mo.h" 25 26 /* Reduce the number of ifdefs below. This assumes that all uses of 27 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 28 the compiler can eliminate. */ 29 #if TCG_TARGET_REG_BITS == 64 30 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 31 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 32 #define TCGV_LOW TCGV_LOW_link_error 33 #define TCGV_HIGH TCGV_HIGH_link_error 34 #endif 35 36 /* 37 * Vector optional opcode tracking. 38 * Except for the basic logical operations (and, or, xor), and 39 * data movement (mov, ld, st, dupi), many vector opcodes are 40 * optional and may not be supported on the host. Thank Intel 41 * for the irregularity in their instruction set. 42 * 43 * The gvec expanders allow custom vector operations to be composed, 44 * generally via the .fniv callback in the GVecGen* structures. At 45 * the same time, in deciding whether to use this hook we need to 46 * know if the host supports the required operations. This is 47 * presented as an array of opcodes, terminated by 0. Each opcode 48 * is assumed to be expanded with the given VECE. 49 * 50 * For debugging, we want to validate this array. Therefore, when 51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 52 * will validate that their opcode is present in the list. 53 */ 54 #ifdef CONFIG_DEBUG_TCG 55 void tcg_assert_listed_vecop(TCGOpcode op) 56 { 57 const TCGOpcode *p = tcg_ctx->vecop_list; 58 if (p) { 59 for (; *p; ++p) { 60 if (*p == op) { 61 return; 62 } 63 } 64 g_assert_not_reached(); 65 } 66 } 67 #endif 68 69 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 70 TCGType type, unsigned vece) 71 { 72 if (list == NULL) { 73 return true; 74 } 75 76 for (; *list; ++list) { 77 TCGOpcode opc = *list; 78 79 #ifdef CONFIG_DEBUG_TCG 80 switch (opc) { 81 case INDEX_op_and_vec: 82 case INDEX_op_or_vec: 83 case INDEX_op_xor_vec: 84 case INDEX_op_mov_vec: 85 case INDEX_op_dup_vec: 86 case INDEX_op_dupi_vec: 87 case INDEX_op_dup2_vec: 88 case INDEX_op_ld_vec: 89 case INDEX_op_st_vec: 90 case INDEX_op_bitsel_vec: 91 /* These opcodes are mandatory and should not be listed. */ 92 g_assert_not_reached(); 93 default: 94 break; 95 } 96 #endif 97 98 if (tcg_can_emit_vec_op(opc, type, vece)) { 99 continue; 100 } 101 102 /* 103 * The opcode list is created by front ends based on what they 104 * actually invoke. We must mirror the logic in the routines 105 * below for generic expansions using other opcodes. 106 */ 107 switch (opc) { 108 case INDEX_op_neg_vec: 109 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 110 continue; 111 } 112 break; 113 case INDEX_op_abs_vec: 114 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 115 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 116 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 117 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 118 continue; 119 } 120 break; 121 case INDEX_op_cmpsel_vec: 122 case INDEX_op_smin_vec: 123 case INDEX_op_smax_vec: 124 case INDEX_op_umin_vec: 125 case INDEX_op_umax_vec: 126 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 127 continue; 128 } 129 break; 130 default: 131 break; 132 } 133 return false; 134 } 135 return true; 136 } 137 138 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 139 { 140 TCGOp *op = tcg_emit_op(opc); 141 TCGOP_VECL(op) = type - TCG_TYPE_V64; 142 TCGOP_VECE(op) = vece; 143 op->args[0] = r; 144 op->args[1] = a; 145 } 146 147 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 148 TCGArg r, TCGArg a, TCGArg b) 149 { 150 TCGOp *op = tcg_emit_op(opc); 151 TCGOP_VECL(op) = type - TCG_TYPE_V64; 152 TCGOP_VECE(op) = vece; 153 op->args[0] = r; 154 op->args[1] = a; 155 op->args[2] = b; 156 } 157 158 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 159 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 160 { 161 TCGOp *op = tcg_emit_op(opc); 162 TCGOP_VECL(op) = type - TCG_TYPE_V64; 163 TCGOP_VECE(op) = vece; 164 op->args[0] = r; 165 op->args[1] = a; 166 op->args[2] = b; 167 op->args[3] = c; 168 } 169 170 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 171 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 172 { 173 TCGOp *op = tcg_emit_op(opc); 174 TCGOP_VECL(op) = type - TCG_TYPE_V64; 175 TCGOP_VECE(op) = vece; 176 op->args[0] = r; 177 op->args[1] = a; 178 op->args[2] = b; 179 op->args[3] = c; 180 op->args[4] = d; 181 op->args[5] = e; 182 } 183 184 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 185 { 186 TCGTemp *rt = tcgv_vec_temp(r); 187 TCGTemp *at = tcgv_vec_temp(a); 188 TCGType type = rt->base_type; 189 190 /* Must enough inputs for the output. */ 191 tcg_debug_assert(at->base_type >= type); 192 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 193 } 194 195 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 196 TCGv_vec r, TCGv_vec a, TCGv_vec b) 197 { 198 TCGTemp *rt = tcgv_vec_temp(r); 199 TCGTemp *at = tcgv_vec_temp(a); 200 TCGTemp *bt = tcgv_vec_temp(b); 201 TCGType type = rt->base_type; 202 203 /* Must enough inputs for the output. */ 204 tcg_debug_assert(at->base_type >= type); 205 tcg_debug_assert(bt->base_type >= type); 206 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 207 } 208 209 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 210 { 211 if (r != a) { 212 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 213 } 214 } 215 216 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32) 217 218 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a) 219 { 220 TCGTemp *rt = tcgv_vec_temp(r); 221 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a); 222 } 223 224 TCGv_vec tcg_const_zeros_vec(TCGType type) 225 { 226 TCGv_vec ret = tcg_temp_new_vec(type); 227 do_dupi_vec(ret, MO_REG, 0); 228 return ret; 229 } 230 231 TCGv_vec tcg_const_ones_vec(TCGType type) 232 { 233 TCGv_vec ret = tcg_temp_new_vec(type); 234 do_dupi_vec(ret, MO_REG, -1); 235 return ret; 236 } 237 238 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 239 { 240 TCGTemp *t = tcgv_vec_temp(m); 241 return tcg_const_zeros_vec(t->base_type); 242 } 243 244 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 245 { 246 TCGTemp *t = tcgv_vec_temp(m); 247 return tcg_const_ones_vec(t->base_type); 248 } 249 250 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) 251 { 252 if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) { 253 do_dupi_vec(r, MO_32, a); 254 } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) { 255 do_dupi_vec(r, MO_64, a); 256 } else { 257 TCGv_i64 c = tcg_const_i64(a); 258 tcg_gen_dup_i64_vec(MO_64, r, c); 259 tcg_temp_free_i64(c); 260 } 261 } 262 263 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a) 264 { 265 do_dupi_vec(r, MO_REG, dup_const(MO_32, a)); 266 } 267 268 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a) 269 { 270 do_dupi_vec(r, MO_REG, dup_const(MO_16, a)); 271 } 272 273 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) 274 { 275 do_dupi_vec(r, MO_REG, dup_const(MO_8, a)); 276 } 277 278 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 279 { 280 do_dupi_vec(r, MO_REG, dup_const(vece, a)); 281 } 282 283 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 284 { 285 TCGArg ri = tcgv_vec_arg(r); 286 TCGTemp *rt = arg_temp(ri); 287 TCGType type = rt->base_type; 288 289 if (TCG_TARGET_REG_BITS == 64) { 290 TCGArg ai = tcgv_i64_arg(a); 291 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 292 } else if (vece == MO_64) { 293 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 294 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 295 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 296 } else { 297 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 298 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 299 } 300 } 301 302 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 303 { 304 TCGArg ri = tcgv_vec_arg(r); 305 TCGArg ai = tcgv_i32_arg(a); 306 TCGTemp *rt = arg_temp(ri); 307 TCGType type = rt->base_type; 308 309 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 310 } 311 312 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 313 tcg_target_long ofs) 314 { 315 TCGArg ri = tcgv_vec_arg(r); 316 TCGArg bi = tcgv_ptr_arg(b); 317 TCGTemp *rt = arg_temp(ri); 318 TCGType type = rt->base_type; 319 320 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 321 } 322 323 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 324 { 325 TCGArg ri = tcgv_vec_arg(r); 326 TCGArg bi = tcgv_ptr_arg(b); 327 TCGTemp *rt = arg_temp(ri); 328 TCGType type = rt->base_type; 329 330 vec_gen_3(opc, type, 0, ri, bi, o); 331 } 332 333 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 334 { 335 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 336 } 337 338 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 339 { 340 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 341 } 342 343 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 344 { 345 TCGArg ri = tcgv_vec_arg(r); 346 TCGArg bi = tcgv_ptr_arg(b); 347 TCGTemp *rt = arg_temp(ri); 348 TCGType type = rt->base_type; 349 350 tcg_debug_assert(low_type >= TCG_TYPE_V64); 351 tcg_debug_assert(low_type <= type); 352 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 353 } 354 355 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 356 { 357 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 358 } 359 360 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 361 { 362 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 363 } 364 365 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 366 { 367 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 368 } 369 370 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 371 { 372 if (TCG_TARGET_HAS_andc_vec) { 373 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 374 } else { 375 TCGv_vec t = tcg_temp_new_vec_matching(r); 376 tcg_gen_not_vec(0, t, b); 377 tcg_gen_and_vec(0, r, a, t); 378 tcg_temp_free_vec(t); 379 } 380 } 381 382 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 383 { 384 if (TCG_TARGET_HAS_orc_vec) { 385 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 386 } else { 387 TCGv_vec t = tcg_temp_new_vec_matching(r); 388 tcg_gen_not_vec(0, t, b); 389 tcg_gen_or_vec(0, r, a, t); 390 tcg_temp_free_vec(t); 391 } 392 } 393 394 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 395 { 396 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ 397 tcg_gen_and_vec(0, r, a, b); 398 tcg_gen_not_vec(0, r, r); 399 } 400 401 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 402 { 403 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ 404 tcg_gen_or_vec(0, r, a, b); 405 tcg_gen_not_vec(0, r, r); 406 } 407 408 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 409 { 410 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ 411 tcg_gen_xor_vec(0, r, a, b); 412 tcg_gen_not_vec(0, r, r); 413 } 414 415 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 416 { 417 TCGTemp *rt = tcgv_vec_temp(r); 418 TCGTemp *at = tcgv_vec_temp(a); 419 TCGArg ri = temp_arg(rt); 420 TCGArg ai = temp_arg(at); 421 TCGType type = rt->base_type; 422 int can; 423 424 tcg_debug_assert(at->base_type >= type); 425 tcg_assert_listed_vecop(opc); 426 can = tcg_can_emit_vec_op(opc, type, vece); 427 if (can > 0) { 428 vec_gen_2(opc, type, vece, ri, ai); 429 } else if (can < 0) { 430 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 431 tcg_expand_vec_op(opc, type, vece, ri, ai); 432 tcg_swap_vecop_list(hold_list); 433 } else { 434 return false; 435 } 436 return true; 437 } 438 439 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 440 { 441 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 442 TCGv_vec t = tcg_const_ones_vec_matching(r); 443 tcg_gen_xor_vec(0, r, a, t); 444 tcg_temp_free_vec(t); 445 } 446 } 447 448 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 449 { 450 const TCGOpcode *hold_list; 451 452 tcg_assert_listed_vecop(INDEX_op_neg_vec); 453 hold_list = tcg_swap_vecop_list(NULL); 454 455 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 456 TCGv_vec t = tcg_const_zeros_vec_matching(r); 457 tcg_gen_sub_vec(vece, r, t, a); 458 tcg_temp_free_vec(t); 459 } 460 tcg_swap_vecop_list(hold_list); 461 } 462 463 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 464 { 465 const TCGOpcode *hold_list; 466 467 tcg_assert_listed_vecop(INDEX_op_abs_vec); 468 hold_list = tcg_swap_vecop_list(NULL); 469 470 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 471 TCGType type = tcgv_vec_temp(r)->base_type; 472 TCGv_vec t = tcg_temp_new_vec(type); 473 474 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 475 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 476 tcg_gen_neg_vec(vece, t, a); 477 tcg_gen_smax_vec(vece, r, a, t); 478 } else { 479 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 480 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 481 } else { 482 do_dupi_vec(t, MO_REG, 0); 483 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); 484 } 485 tcg_gen_xor_vec(vece, r, a, t); 486 tcg_gen_sub_vec(vece, r, r, t); 487 } 488 489 tcg_temp_free_vec(t); 490 } 491 tcg_swap_vecop_list(hold_list); 492 } 493 494 static void do_shifti(TCGOpcode opc, unsigned vece, 495 TCGv_vec r, TCGv_vec a, int64_t i) 496 { 497 TCGTemp *rt = tcgv_vec_temp(r); 498 TCGTemp *at = tcgv_vec_temp(a); 499 TCGArg ri = temp_arg(rt); 500 TCGArg ai = temp_arg(at); 501 TCGType type = rt->base_type; 502 int can; 503 504 tcg_debug_assert(at->base_type == type); 505 tcg_debug_assert(i >= 0 && i < (8 << vece)); 506 tcg_assert_listed_vecop(opc); 507 508 if (i == 0) { 509 tcg_gen_mov_vec(r, a); 510 return; 511 } 512 513 can = tcg_can_emit_vec_op(opc, type, vece); 514 if (can > 0) { 515 vec_gen_3(opc, type, vece, ri, ai, i); 516 } else { 517 /* We leave the choice of expansion via scalar or vector shift 518 to the target. Often, but not always, dupi can feed a vector 519 shift easier than a scalar. */ 520 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 521 tcg_debug_assert(can < 0); 522 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 523 tcg_swap_vecop_list(hold_list); 524 } 525 } 526 527 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 528 { 529 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 530 } 531 532 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 533 { 534 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 535 } 536 537 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 538 { 539 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 540 } 541 542 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 543 TCGv_vec r, TCGv_vec a, TCGv_vec b) 544 { 545 TCGTemp *rt = tcgv_vec_temp(r); 546 TCGTemp *at = tcgv_vec_temp(a); 547 TCGTemp *bt = tcgv_vec_temp(b); 548 TCGArg ri = temp_arg(rt); 549 TCGArg ai = temp_arg(at); 550 TCGArg bi = temp_arg(bt); 551 TCGType type = rt->base_type; 552 int can; 553 554 tcg_debug_assert(at->base_type >= type); 555 tcg_debug_assert(bt->base_type >= type); 556 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 557 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 558 if (can > 0) { 559 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 560 } else { 561 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 562 tcg_debug_assert(can < 0); 563 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 564 tcg_swap_vecop_list(hold_list); 565 } 566 } 567 568 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 569 TCGv_vec b, TCGOpcode opc) 570 { 571 TCGTemp *rt = tcgv_vec_temp(r); 572 TCGTemp *at = tcgv_vec_temp(a); 573 TCGTemp *bt = tcgv_vec_temp(b); 574 TCGArg ri = temp_arg(rt); 575 TCGArg ai = temp_arg(at); 576 TCGArg bi = temp_arg(bt); 577 TCGType type = rt->base_type; 578 int can; 579 580 tcg_debug_assert(at->base_type >= type); 581 tcg_debug_assert(bt->base_type >= type); 582 tcg_assert_listed_vecop(opc); 583 can = tcg_can_emit_vec_op(opc, type, vece); 584 if (can > 0) { 585 vec_gen_3(opc, type, vece, ri, ai, bi); 586 } else if (can < 0) { 587 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 588 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 589 tcg_swap_vecop_list(hold_list); 590 } else { 591 return false; 592 } 593 return true; 594 } 595 596 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 597 TCGv_vec b, TCGOpcode opc) 598 { 599 bool ok = do_op3(vece, r, a, b, opc); 600 tcg_debug_assert(ok); 601 } 602 603 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 604 { 605 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 606 } 607 608 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 609 { 610 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 611 } 612 613 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 614 { 615 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 616 } 617 618 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 619 { 620 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 621 } 622 623 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 624 { 625 do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); 626 } 627 628 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 629 { 630 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 631 } 632 633 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 634 { 635 do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); 636 } 637 638 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 639 TCGv_vec b, TCGOpcode opc, TCGCond cond) 640 { 641 if (!do_op3(vece, r, a, b, opc)) { 642 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 643 } 644 } 645 646 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 647 { 648 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 649 } 650 651 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 652 { 653 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 654 } 655 656 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 657 { 658 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 659 } 660 661 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 662 { 663 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 664 } 665 666 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 667 { 668 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 669 } 670 671 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 672 { 673 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 674 } 675 676 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 677 { 678 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 679 } 680 681 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 682 TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) 683 { 684 TCGTemp *rt = tcgv_vec_temp(r); 685 TCGTemp *at = tcgv_vec_temp(a); 686 TCGTemp *st = tcgv_i32_temp(s); 687 TCGArg ri = temp_arg(rt); 688 TCGArg ai = temp_arg(at); 689 TCGArg si = temp_arg(st); 690 TCGType type = rt->base_type; 691 const TCGOpcode *hold_list; 692 int can; 693 694 tcg_debug_assert(at->base_type >= type); 695 tcg_assert_listed_vecop(opc_s); 696 hold_list = tcg_swap_vecop_list(NULL); 697 698 can = tcg_can_emit_vec_op(opc_s, type, vece); 699 if (can > 0) { 700 vec_gen_3(opc_s, type, vece, ri, ai, si); 701 } else if (can < 0) { 702 tcg_expand_vec_op(opc_s, type, vece, ri, ai, si); 703 } else { 704 TCGv_vec vec_s = tcg_temp_new_vec(type); 705 706 if (vece == MO_64) { 707 TCGv_i64 s64 = tcg_temp_new_i64(); 708 tcg_gen_extu_i32_i64(s64, s); 709 tcg_gen_dup_i64_vec(MO_64, vec_s, s64); 710 tcg_temp_free_i64(s64); 711 } else { 712 tcg_gen_dup_i32_vec(vece, vec_s, s); 713 } 714 do_op3_nofail(vece, r, a, vec_s, opc_v); 715 tcg_temp_free_vec(vec_s); 716 } 717 tcg_swap_vecop_list(hold_list); 718 } 719 720 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 721 { 722 do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); 723 } 724 725 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 726 { 727 do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); 728 } 729 730 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 731 { 732 do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); 733 } 734 735 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 736 TCGv_vec b, TCGv_vec c) 737 { 738 TCGTemp *rt = tcgv_vec_temp(r); 739 TCGTemp *at = tcgv_vec_temp(a); 740 TCGTemp *bt = tcgv_vec_temp(b); 741 TCGTemp *ct = tcgv_vec_temp(c); 742 TCGType type = rt->base_type; 743 744 tcg_debug_assert(at->base_type >= type); 745 tcg_debug_assert(bt->base_type >= type); 746 tcg_debug_assert(ct->base_type >= type); 747 748 if (TCG_TARGET_HAS_bitsel_vec) { 749 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 750 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 751 } else { 752 TCGv_vec t = tcg_temp_new_vec(type); 753 tcg_gen_and_vec(MO_8, t, a, b); 754 tcg_gen_andc_vec(MO_8, r, c, a); 755 tcg_gen_or_vec(MO_8, r, r, t); 756 tcg_temp_free_vec(t); 757 } 758 } 759 760 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 761 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 762 { 763 TCGTemp *rt = tcgv_vec_temp(r); 764 TCGTemp *at = tcgv_vec_temp(a); 765 TCGTemp *bt = tcgv_vec_temp(b); 766 TCGTemp *ct = tcgv_vec_temp(c); 767 TCGTemp *dt = tcgv_vec_temp(d); 768 TCGArg ri = temp_arg(rt); 769 TCGArg ai = temp_arg(at); 770 TCGArg bi = temp_arg(bt); 771 TCGArg ci = temp_arg(ct); 772 TCGArg di = temp_arg(dt); 773 TCGType type = rt->base_type; 774 const TCGOpcode *hold_list; 775 int can; 776 777 tcg_debug_assert(at->base_type >= type); 778 tcg_debug_assert(bt->base_type >= type); 779 tcg_debug_assert(ct->base_type >= type); 780 tcg_debug_assert(dt->base_type >= type); 781 782 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 783 hold_list = tcg_swap_vecop_list(NULL); 784 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 785 786 if (can > 0) { 787 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 788 } else if (can < 0) { 789 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 790 ri, ai, bi, ci, di, cond); 791 } else { 792 TCGv_vec t = tcg_temp_new_vec(type); 793 tcg_gen_cmp_vec(cond, vece, t, a, b); 794 tcg_gen_bitsel_vec(vece, r, t, c, d); 795 tcg_temp_free_vec(t); 796 } 797 tcg_swap_vecop_list(hold_list); 798 } 799