1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "tcg/tcg.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-mo.h" 25 26 /* Reduce the number of ifdefs below. This assumes that all uses of 27 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that 28 the compiler can eliminate. */ 29 #if TCG_TARGET_REG_BITS == 64 30 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); 31 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); 32 #define TCGV_LOW TCGV_LOW_link_error 33 #define TCGV_HIGH TCGV_HIGH_link_error 34 #endif 35 36 /* 37 * Vector optional opcode tracking. 38 * Except for the basic logical operations (and, or, xor), and 39 * data movement (mov, ld, st, dupi), many vector opcodes are 40 * optional and may not be supported on the host. Thank Intel 41 * for the irregularity in their instruction set. 42 * 43 * The gvec expanders allow custom vector operations to be composed, 44 * generally via the .fniv callback in the GVecGen* structures. At 45 * the same time, in deciding whether to use this hook we need to 46 * know if the host supports the required operations. This is 47 * presented as an array of opcodes, terminated by 0. Each opcode 48 * is assumed to be expanded with the given VECE. 49 * 50 * For debugging, we want to validate this array. Therefore, when 51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 52 * will validate that their opcode is present in the list. 53 */ 54 #ifdef CONFIG_DEBUG_TCG 55 void tcg_assert_listed_vecop(TCGOpcode op) 56 { 57 const TCGOpcode *p = tcg_ctx->vecop_list; 58 if (p) { 59 for (; *p; ++p) { 60 if (*p == op) { 61 return; 62 } 63 } 64 g_assert_not_reached(); 65 } 66 } 67 #endif 68 69 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 70 TCGType type, unsigned vece) 71 { 72 if (list == NULL) { 73 return true; 74 } 75 76 for (; *list; ++list) { 77 TCGOpcode opc = *list; 78 79 #ifdef CONFIG_DEBUG_TCG 80 switch (opc) { 81 case INDEX_op_and_vec: 82 case INDEX_op_or_vec: 83 case INDEX_op_xor_vec: 84 case INDEX_op_mov_vec: 85 case INDEX_op_dup_vec: 86 case INDEX_op_dupi_vec: 87 case INDEX_op_dup2_vec: 88 case INDEX_op_ld_vec: 89 case INDEX_op_st_vec: 90 case INDEX_op_bitsel_vec: 91 /* These opcodes are mandatory and should not be listed. */ 92 g_assert_not_reached(); 93 case INDEX_op_not_vec: 94 /* These opcodes have generic expansions using the above. */ 95 g_assert_not_reached(); 96 default: 97 break; 98 } 99 #endif 100 101 if (tcg_can_emit_vec_op(opc, type, vece)) { 102 continue; 103 } 104 105 /* 106 * The opcode list is created by front ends based on what they 107 * actually invoke. We must mirror the logic in the routines 108 * below for generic expansions using other opcodes. 109 */ 110 switch (opc) { 111 case INDEX_op_neg_vec: 112 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 113 continue; 114 } 115 break; 116 case INDEX_op_abs_vec: 117 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 118 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 119 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 120 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 121 continue; 122 } 123 break; 124 case INDEX_op_cmpsel_vec: 125 case INDEX_op_smin_vec: 126 case INDEX_op_smax_vec: 127 case INDEX_op_umin_vec: 128 case INDEX_op_umax_vec: 129 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 130 continue; 131 } 132 break; 133 default: 134 break; 135 } 136 return false; 137 } 138 return true; 139 } 140 141 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 142 { 143 TCGOp *op = tcg_emit_op(opc); 144 TCGOP_VECL(op) = type - TCG_TYPE_V64; 145 TCGOP_VECE(op) = vece; 146 op->args[0] = r; 147 op->args[1] = a; 148 } 149 150 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 151 TCGArg r, TCGArg a, TCGArg b) 152 { 153 TCGOp *op = tcg_emit_op(opc); 154 TCGOP_VECL(op) = type - TCG_TYPE_V64; 155 TCGOP_VECE(op) = vece; 156 op->args[0] = r; 157 op->args[1] = a; 158 op->args[2] = b; 159 } 160 161 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 162 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 163 { 164 TCGOp *op = tcg_emit_op(opc); 165 TCGOP_VECL(op) = type - TCG_TYPE_V64; 166 TCGOP_VECE(op) = vece; 167 op->args[0] = r; 168 op->args[1] = a; 169 op->args[2] = b; 170 op->args[3] = c; 171 } 172 173 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 174 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 175 { 176 TCGOp *op = tcg_emit_op(opc); 177 TCGOP_VECL(op) = type - TCG_TYPE_V64; 178 TCGOP_VECE(op) = vece; 179 op->args[0] = r; 180 op->args[1] = a; 181 op->args[2] = b; 182 op->args[3] = c; 183 op->args[4] = d; 184 op->args[5] = e; 185 } 186 187 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 188 { 189 TCGTemp *rt = tcgv_vec_temp(r); 190 TCGTemp *at = tcgv_vec_temp(a); 191 TCGType type = rt->base_type; 192 193 /* Must enough inputs for the output. */ 194 tcg_debug_assert(at->base_type >= type); 195 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 196 } 197 198 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 199 TCGv_vec r, TCGv_vec a, TCGv_vec b) 200 { 201 TCGTemp *rt = tcgv_vec_temp(r); 202 TCGTemp *at = tcgv_vec_temp(a); 203 TCGTemp *bt = tcgv_vec_temp(b); 204 TCGType type = rt->base_type; 205 206 /* Must enough inputs for the output. */ 207 tcg_debug_assert(at->base_type >= type); 208 tcg_debug_assert(bt->base_type >= type); 209 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 210 } 211 212 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 213 { 214 if (r != a) { 215 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 216 } 217 } 218 219 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32) 220 221 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a) 222 { 223 TCGTemp *rt = tcgv_vec_temp(r); 224 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a); 225 } 226 227 TCGv_vec tcg_const_zeros_vec(TCGType type) 228 { 229 TCGv_vec ret = tcg_temp_new_vec(type); 230 do_dupi_vec(ret, MO_REG, 0); 231 return ret; 232 } 233 234 TCGv_vec tcg_const_ones_vec(TCGType type) 235 { 236 TCGv_vec ret = tcg_temp_new_vec(type); 237 do_dupi_vec(ret, MO_REG, -1); 238 return ret; 239 } 240 241 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m) 242 { 243 TCGTemp *t = tcgv_vec_temp(m); 244 return tcg_const_zeros_vec(t->base_type); 245 } 246 247 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) 248 { 249 TCGTemp *t = tcgv_vec_temp(m); 250 return tcg_const_ones_vec(t->base_type); 251 } 252 253 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) 254 { 255 if (TCG_TARGET_REG_BITS == 64) { 256 do_dupi_vec(r, MO_64, a); 257 } else if (a == dup_const(MO_32, a)) { 258 do_dupi_vec(r, MO_32, a); 259 } else { 260 TCGv_i64 c = tcg_const_i64(a); 261 tcg_gen_dup_i64_vec(MO_64, r, c); 262 tcg_temp_free_i64(c); 263 } 264 } 265 266 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a) 267 { 268 do_dupi_vec(r, MO_REG, dup_const(MO_32, a)); 269 } 270 271 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a) 272 { 273 do_dupi_vec(r, MO_REG, dup_const(MO_16, a)); 274 } 275 276 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) 277 { 278 do_dupi_vec(r, MO_REG, dup_const(MO_8, a)); 279 } 280 281 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 282 { 283 if (vece == MO_64) { 284 tcg_gen_dup64i_vec(r, a); 285 } else { 286 do_dupi_vec(r, MO_REG, dup_const(vece, a)); 287 } 288 } 289 290 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 291 { 292 TCGArg ri = tcgv_vec_arg(r); 293 TCGTemp *rt = arg_temp(ri); 294 TCGType type = rt->base_type; 295 296 if (TCG_TARGET_REG_BITS == 64) { 297 TCGArg ai = tcgv_i64_arg(a); 298 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 299 } else if (vece == MO_64) { 300 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 301 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 302 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 303 } else { 304 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 305 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 306 } 307 } 308 309 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 310 { 311 TCGArg ri = tcgv_vec_arg(r); 312 TCGArg ai = tcgv_i32_arg(a); 313 TCGTemp *rt = arg_temp(ri); 314 TCGType type = rt->base_type; 315 316 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 317 } 318 319 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 320 tcg_target_long ofs) 321 { 322 TCGArg ri = tcgv_vec_arg(r); 323 TCGArg bi = tcgv_ptr_arg(b); 324 TCGTemp *rt = arg_temp(ri); 325 TCGType type = rt->base_type; 326 327 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 328 } 329 330 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 331 { 332 TCGArg ri = tcgv_vec_arg(r); 333 TCGArg bi = tcgv_ptr_arg(b); 334 TCGTemp *rt = arg_temp(ri); 335 TCGType type = rt->base_type; 336 337 vec_gen_3(opc, type, 0, ri, bi, o); 338 } 339 340 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 341 { 342 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 343 } 344 345 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 346 { 347 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 348 } 349 350 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 351 { 352 TCGArg ri = tcgv_vec_arg(r); 353 TCGArg bi = tcgv_ptr_arg(b); 354 TCGTemp *rt = arg_temp(ri); 355 TCGType type = rt->base_type; 356 357 tcg_debug_assert(low_type >= TCG_TYPE_V64); 358 tcg_debug_assert(low_type <= type); 359 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 360 } 361 362 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 363 { 364 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 365 } 366 367 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 368 { 369 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 370 } 371 372 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 373 { 374 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 375 } 376 377 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 378 { 379 if (TCG_TARGET_HAS_andc_vec) { 380 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 381 } else { 382 TCGv_vec t = tcg_temp_new_vec_matching(r); 383 tcg_gen_not_vec(0, t, b); 384 tcg_gen_and_vec(0, r, a, t); 385 tcg_temp_free_vec(t); 386 } 387 } 388 389 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 390 { 391 if (TCG_TARGET_HAS_orc_vec) { 392 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 393 } else { 394 TCGv_vec t = tcg_temp_new_vec_matching(r); 395 tcg_gen_not_vec(0, t, b); 396 tcg_gen_or_vec(0, r, a, t); 397 tcg_temp_free_vec(t); 398 } 399 } 400 401 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 402 { 403 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ 404 tcg_gen_and_vec(0, r, a, b); 405 tcg_gen_not_vec(0, r, r); 406 } 407 408 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 409 { 410 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ 411 tcg_gen_or_vec(0, r, a, b); 412 tcg_gen_not_vec(0, r, r); 413 } 414 415 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 416 { 417 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ 418 tcg_gen_xor_vec(0, r, a, b); 419 tcg_gen_not_vec(0, r, r); 420 } 421 422 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 423 { 424 TCGTemp *rt = tcgv_vec_temp(r); 425 TCGTemp *at = tcgv_vec_temp(a); 426 TCGArg ri = temp_arg(rt); 427 TCGArg ai = temp_arg(at); 428 TCGType type = rt->base_type; 429 int can; 430 431 tcg_debug_assert(at->base_type >= type); 432 tcg_assert_listed_vecop(opc); 433 can = tcg_can_emit_vec_op(opc, type, vece); 434 if (can > 0) { 435 vec_gen_2(opc, type, vece, ri, ai); 436 } else if (can < 0) { 437 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 438 tcg_expand_vec_op(opc, type, vece, ri, ai); 439 tcg_swap_vecop_list(hold_list); 440 } else { 441 return false; 442 } 443 return true; 444 } 445 446 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 447 { 448 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 449 450 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { 451 TCGv_vec t = tcg_const_ones_vec_matching(r); 452 tcg_gen_xor_vec(0, r, a, t); 453 tcg_temp_free_vec(t); 454 } 455 tcg_swap_vecop_list(hold_list); 456 } 457 458 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 459 { 460 const TCGOpcode *hold_list; 461 462 tcg_assert_listed_vecop(INDEX_op_neg_vec); 463 hold_list = tcg_swap_vecop_list(NULL); 464 465 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 466 TCGv_vec t = tcg_const_zeros_vec_matching(r); 467 tcg_gen_sub_vec(vece, r, t, a); 468 tcg_temp_free_vec(t); 469 } 470 tcg_swap_vecop_list(hold_list); 471 } 472 473 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 474 { 475 const TCGOpcode *hold_list; 476 477 tcg_assert_listed_vecop(INDEX_op_abs_vec); 478 hold_list = tcg_swap_vecop_list(NULL); 479 480 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 481 TCGType type = tcgv_vec_temp(r)->base_type; 482 TCGv_vec t = tcg_temp_new_vec(type); 483 484 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 485 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 486 tcg_gen_neg_vec(vece, t, a); 487 tcg_gen_smax_vec(vece, r, a, t); 488 } else { 489 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 490 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 491 } else { 492 do_dupi_vec(t, MO_REG, 0); 493 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); 494 } 495 tcg_gen_xor_vec(vece, r, a, t); 496 tcg_gen_sub_vec(vece, r, r, t); 497 } 498 499 tcg_temp_free_vec(t); 500 } 501 tcg_swap_vecop_list(hold_list); 502 } 503 504 static void do_shifti(TCGOpcode opc, unsigned vece, 505 TCGv_vec r, TCGv_vec a, int64_t i) 506 { 507 TCGTemp *rt = tcgv_vec_temp(r); 508 TCGTemp *at = tcgv_vec_temp(a); 509 TCGArg ri = temp_arg(rt); 510 TCGArg ai = temp_arg(at); 511 TCGType type = rt->base_type; 512 int can; 513 514 tcg_debug_assert(at->base_type == type); 515 tcg_debug_assert(i >= 0 && i < (8 << vece)); 516 tcg_assert_listed_vecop(opc); 517 518 if (i == 0) { 519 tcg_gen_mov_vec(r, a); 520 return; 521 } 522 523 can = tcg_can_emit_vec_op(opc, type, vece); 524 if (can > 0) { 525 vec_gen_3(opc, type, vece, ri, ai, i); 526 } else { 527 /* We leave the choice of expansion via scalar or vector shift 528 to the target. Often, but not always, dupi can feed a vector 529 shift easier than a scalar. */ 530 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 531 tcg_debug_assert(can < 0); 532 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 533 tcg_swap_vecop_list(hold_list); 534 } 535 } 536 537 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 538 { 539 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 540 } 541 542 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 543 { 544 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 545 } 546 547 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 548 { 549 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 550 } 551 552 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 553 { 554 do_shifti(INDEX_op_rotli_vec, vece, r, a, i); 555 } 556 557 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 558 { 559 int bits = 8 << vece; 560 tcg_debug_assert(i >= 0 && i < bits); 561 do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); 562 } 563 564 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 565 TCGv_vec r, TCGv_vec a, TCGv_vec b) 566 { 567 TCGTemp *rt = tcgv_vec_temp(r); 568 TCGTemp *at = tcgv_vec_temp(a); 569 TCGTemp *bt = tcgv_vec_temp(b); 570 TCGArg ri = temp_arg(rt); 571 TCGArg ai = temp_arg(at); 572 TCGArg bi = temp_arg(bt); 573 TCGType type = rt->base_type; 574 int can; 575 576 tcg_debug_assert(at->base_type >= type); 577 tcg_debug_assert(bt->base_type >= type); 578 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 579 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 580 if (can > 0) { 581 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 582 } else { 583 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 584 tcg_debug_assert(can < 0); 585 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 586 tcg_swap_vecop_list(hold_list); 587 } 588 } 589 590 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 591 TCGv_vec b, TCGOpcode opc) 592 { 593 TCGTemp *rt = tcgv_vec_temp(r); 594 TCGTemp *at = tcgv_vec_temp(a); 595 TCGTemp *bt = tcgv_vec_temp(b); 596 TCGArg ri = temp_arg(rt); 597 TCGArg ai = temp_arg(at); 598 TCGArg bi = temp_arg(bt); 599 TCGType type = rt->base_type; 600 int can; 601 602 tcg_debug_assert(at->base_type >= type); 603 tcg_debug_assert(bt->base_type >= type); 604 tcg_assert_listed_vecop(opc); 605 can = tcg_can_emit_vec_op(opc, type, vece); 606 if (can > 0) { 607 vec_gen_3(opc, type, vece, ri, ai, bi); 608 } else if (can < 0) { 609 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 610 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 611 tcg_swap_vecop_list(hold_list); 612 } else { 613 return false; 614 } 615 return true; 616 } 617 618 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 619 TCGv_vec b, TCGOpcode opc) 620 { 621 bool ok = do_op3(vece, r, a, b, opc); 622 tcg_debug_assert(ok); 623 } 624 625 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 626 { 627 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 628 } 629 630 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 631 { 632 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 633 } 634 635 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 636 { 637 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 638 } 639 640 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 641 { 642 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 643 } 644 645 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 646 { 647 do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); 648 } 649 650 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 651 { 652 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 653 } 654 655 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 656 { 657 do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); 658 } 659 660 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 661 TCGv_vec b, TCGOpcode opc, TCGCond cond) 662 { 663 if (!do_op3(vece, r, a, b, opc)) { 664 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 665 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 666 tcg_swap_vecop_list(hold_list); 667 } 668 } 669 670 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 671 { 672 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 673 } 674 675 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 676 { 677 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 678 } 679 680 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 681 { 682 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 683 } 684 685 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 686 { 687 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 688 } 689 690 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 691 { 692 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 693 } 694 695 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 696 { 697 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 698 } 699 700 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 701 { 702 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 703 } 704 705 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 706 { 707 do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); 708 } 709 710 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 711 { 712 do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); 713 } 714 715 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 716 TCGv_i32 s, TCGOpcode opc) 717 { 718 TCGTemp *rt = tcgv_vec_temp(r); 719 TCGTemp *at = tcgv_vec_temp(a); 720 TCGTemp *st = tcgv_i32_temp(s); 721 TCGArg ri = temp_arg(rt); 722 TCGArg ai = temp_arg(at); 723 TCGArg si = temp_arg(st); 724 TCGType type = rt->base_type; 725 int can; 726 727 tcg_debug_assert(at->base_type >= type); 728 tcg_assert_listed_vecop(opc); 729 can = tcg_can_emit_vec_op(opc, type, vece); 730 if (can > 0) { 731 vec_gen_3(opc, type, vece, ri, ai, si); 732 } else if (can < 0) { 733 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 734 tcg_expand_vec_op(opc, type, vece, ri, ai, si); 735 tcg_swap_vecop_list(hold_list); 736 } else { 737 g_assert_not_reached(); 738 } 739 } 740 741 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 742 { 743 do_shifts(vece, r, a, b, INDEX_op_shls_vec); 744 } 745 746 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 747 { 748 do_shifts(vece, r, a, b, INDEX_op_shrs_vec); 749 } 750 751 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 752 { 753 do_shifts(vece, r, a, b, INDEX_op_sars_vec); 754 } 755 756 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) 757 { 758 do_shifts(vece, r, a, s, INDEX_op_rotls_vec); 759 } 760 761 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 762 TCGv_vec b, TCGv_vec c) 763 { 764 TCGTemp *rt = tcgv_vec_temp(r); 765 TCGTemp *at = tcgv_vec_temp(a); 766 TCGTemp *bt = tcgv_vec_temp(b); 767 TCGTemp *ct = tcgv_vec_temp(c); 768 TCGType type = rt->base_type; 769 770 tcg_debug_assert(at->base_type >= type); 771 tcg_debug_assert(bt->base_type >= type); 772 tcg_debug_assert(ct->base_type >= type); 773 774 if (TCG_TARGET_HAS_bitsel_vec) { 775 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 776 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 777 } else { 778 TCGv_vec t = tcg_temp_new_vec(type); 779 tcg_gen_and_vec(MO_8, t, a, b); 780 tcg_gen_andc_vec(MO_8, r, c, a); 781 tcg_gen_or_vec(MO_8, r, r, t); 782 tcg_temp_free_vec(t); 783 } 784 } 785 786 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 787 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 788 { 789 TCGTemp *rt = tcgv_vec_temp(r); 790 TCGTemp *at = tcgv_vec_temp(a); 791 TCGTemp *bt = tcgv_vec_temp(b); 792 TCGTemp *ct = tcgv_vec_temp(c); 793 TCGTemp *dt = tcgv_vec_temp(d); 794 TCGArg ri = temp_arg(rt); 795 TCGArg ai = temp_arg(at); 796 TCGArg bi = temp_arg(bt); 797 TCGArg ci = temp_arg(ct); 798 TCGArg di = temp_arg(dt); 799 TCGType type = rt->base_type; 800 const TCGOpcode *hold_list; 801 int can; 802 803 tcg_debug_assert(at->base_type >= type); 804 tcg_debug_assert(bt->base_type >= type); 805 tcg_debug_assert(ct->base_type >= type); 806 tcg_debug_assert(dt->base_type >= type); 807 808 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 809 hold_list = tcg_swap_vecop_list(NULL); 810 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 811 812 if (can > 0) { 813 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 814 } else if (can < 0) { 815 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 816 ri, ai, bi, ci, di, cond); 817 } else { 818 TCGv_vec t = tcg_temp_new_vec(type); 819 tcg_gen_cmp_vec(cond, vece, t, a, b); 820 tcg_gen_bitsel_vec(vece, r, t, c, d); 821 tcg_temp_free_vec(t); 822 } 823 tcg_swap_vecop_list(hold_list); 824 } 825